Source code for impacts_estimation.impacts_estimation

""" Environmental impact estimation for Open Food Facts products  """

import warnings
import time
from random import uniform, shuffle, choice
from statistics import mean
import copy
import math

# ### FOR DEBUG PURPOSE ONLY ###
# import matplotlib.pyplot as plt
# import seaborn as sns
#
# sns.set()
# ##############################

import statsmodels.stats.api as sms
import numpy as np
from sklearn.neighbors import KernelDensity
from pyscipopt import Model

from impacts_estimation.utils import natural_bounds, nutritional_error_margin, \
    clear_ingredient_graph, define_subingredients_percentage_type, find_ingredients_graph_leaves, \
    flat_ingredients_list_BFS, individualize_ingredients, original_id, nutriments_from_recipe, \
    remove_percentage_from_product, confidence_score, UnknownIngredientsRemover, agribalyse_impact_name_i18n
from impacts_estimation.vars import NUTRIMENTS_CATEGORIES, QUALITY_DATA_WARNINGS, \
    TOP_LEVEL_NUTRIMENTS_CATEGORIES, MAX_ASH_CONTENT, FERMENTATION_AGENTS, FERMENTED_FOOD_CATEGORIES, \
    HIGH_WATER_LOSS_CATEGORIES, IMPACT_MASS_UNIT, AGRIBALYSE_IMPACT_UNITS, RESULTS_WARNINGS_NOT_RELIABLE, \
    ID_AGRIBALYSE_ISSUES, BUTTERS_FOOD_CATEGORIES
from settings import VERBOSITY, IMPACT_RELATIVE_INTERQUARTILE_WARNING_THRESHOLD, \
    UNCHARACTERIZED_INGREDIENTS_MASS_WARNING_THRESHOLD, \
    UNCHARACTERIZED_INGREDIENTS_RATIO_WARNING_THRESHOLD, MAX_CONSECUTIVE_RECIPE_CREATION_ERROR, \
    DECREASING_PROPORTION_ORDER_LIMIT, TOTAL_MASS_DISTRIBUTION_STEP, \
    MAX_CONSECUTIVE_NULL_IMPACT_CHARACTERIZED_INGREDIENTS_MASS, MINIMUM_TOTAL_MASS_FOR_UNBALANCED_RECIPES, \
    OFF_INGREDIENTS_FORMAT
from data import ref_ing_dist, ingredients_data, off_taxonomy
from impacts_estimation.exceptions import RecipeCreationError, NoKnownIngredientsError, SolverTimeoutError, \
    NoCharacterizedIngredientsError

ing_with_ref_prct_dist = list(ref_ing_dist.id.unique())


[docs]class RecipeImpactCalculator: def __init__(self, recipe, impact_name, use_uncertainty=False): """ Args: recipe (dict): Dict containing ingredients as keys and masses in grams as values impact_name (str): Name of the impact as in ingredients_data.json use_uncertainty (bool): Should the ingredients uncertainty data be used to pick a randomized impact value? If True, the result may vary from one call to another. """ self.recipe = recipe self.impact_name = agribalyse_impact_name_i18n(impact_name) self.use_uncertainty = use_uncertainty self.ingredients_impacts = dict() self.known_ingredients_mass = None self.known_ingredients_impact = None self.impact_computed = False self.impact_shares_computed = False self._define_ingredients_impacts()
[docs] def _define_ingredients_impacts(self): """ Getting the impact of each ingredient. If the ingredient has no uncertainty parameters or use_uncertainty is set to False, simply use the default value. Else pick a value using the uncertainty parameters. """ for ingredient in self.recipe: try: ingredient_impact_data = ingredients_data[ingredient]['impacts'][self.impact_name] except KeyError: continue rng = np.random.default_rng() if ('uncertainty_distributions' not in ingredient_impact_data) or (not self.use_uncertainty): self.ingredients_impacts[ingredient] = ingredient_impact_data['amount'] else: # Pick a random uncertainty distribution uncertainty_distribution = choice(ingredient_impact_data['uncertainty_distributions']) if uncertainty_distribution['distribution'] == 'normal': self.ingredients_impacts[ingredient] = rng.normal(uncertainty_distribution['mean'], uncertainty_distribution['standard deviation']) elif uncertainty_distribution['distribution'] == 'lognormal': if uncertainty_distribution['geometric mean'] >= 0: # Numpy requires the mean and std of the underlying normal distribution, which are the logs of # the mean and std of the lognormal distribution. self.ingredients_impacts[ingredient] = rng.lognormal( np.log(uncertainty_distribution['geometric mean']), np.log(uncertainty_distribution['geometric standard deviation'])) # If the geometric mean is negative, then simply take the opposite of the value generated # with the opposite of the geometric mean if uncertainty_distribution['geometric mean'] < 0: self.ingredients_impacts[ingredient] = - rng.lognormal( np.log(-uncertainty_distribution['geometric mean']), np.log(uncertainty_distribution['geometric standard deviation'])) elif uncertainty_distribution['distribution'] == 'triangular': self.ingredients_impacts[ingredient] = rng.triangular(uncertainty_distribution['minimum'], uncertainty_distribution['mode'], uncertainty_distribution['maximum']) elif uncertainty_distribution['distribution'] == 'uniform': self.ingredients_impacts[ingredient] = rng.uniform(uncertainty_distribution['minimum'], uncertainty_distribution['maximum']) else: raise ValueError(f"Unknown distribution type {uncertainty_distribution['distribution']}" f" for ingredient {ingredient}")
[docs] def _compute_impact(self): # Computing the impact of the recipe self.known_ingredients_impact = 0 self.known_ingredients_mass = 0 self.total_mass = sum([float(x) for x in self.recipe.values()]) # Looping on all ingredients that has a value for the considered impact for ingredient_name, ingredient_impact in self.ingredients_impacts.items(): # Adding the ingredient to the known ingredients mass self.known_ingredients_mass += float(self.recipe[ingredient_name]) # Adding the impact to the result self.known_ingredients_impact += float(self.recipe[ingredient_name]) * ingredient_impact / IMPACT_MASS_UNIT if self.known_ingredients_mass == 0: self._recipe_impact = None else: # Inflating the impact of the known ingredients to the impact of the total mass of these ingredients self._recipe_impact = self.known_ingredients_impact * self.total_mass / self.known_ingredients_mass self.impact_computed = True
[docs] def _compute_impact_shares(self): # Compute impact if it has not been done yet if not self.impact_computed: self._compute_impact() # Computing the impact share for each ingredient self.ingredients_impacts_shares = dict() if self.known_ingredients_mass != 0: for ingredient_name, ingredient_mass in self.recipe.items(): if ingredient_name in self.ingredients_impacts: ingredient_impact = self.ingredients_impacts[ingredient_name] self.ingredients_impacts_shares[ingredient_name] = \ (ingredient_impact * ingredient_mass / IMPACT_MASS_UNIT) / self._recipe_impact else: self.ingredients_impacts_shares[ingredient_name] = ingredient_mass / self.total_mass self.impact_shares_computed = True
[docs] def get_recipe_impact(self): """ Calculate the environmental impact from a product recipe. Warning: Any ingredients whose impact is unknown will be considered to have the average impact of the product. Returns: float: Impact of the product """ # Compute impact if it has not been done yet if not self.impact_computed: self._compute_impact() return self._recipe_impact
[docs] def get_ingredient_impact_share(self, ingredient): """ Returns the share of the recipe impact that is due to the given ingredient """ # Compute impact if it has not been done yet if not self.impact_shares_computed: self._compute_impact_shares() if ingredient not in self.recipe: return ValueError('The ingredient is not present in the recipe.') # If the recipe impact is None, then there is no result if self._recipe_impact is None: return None return self.ingredients_impacts_shares[ingredient]
[docs]def impact_from_recipe(recipe, impact_name, use_uncertainty=False): """ Wrapper for RecipeImpactCalculator """ recipe_impact_calculator = RecipeImpactCalculator(recipe=recipe, impact_name=impact_name, use_uncertainty=use_uncertainty) return recipe_impact_calculator.get_recipe_impact()
[docs]class RandomRecipeCreator: def __init__(self, product, use_defined_prct=True, use_nutritional_info=True, const_relax_coef=0, maximum_evaporation=0.4, total_mass_used=None, min_prct_dist_size=30, dual_gap_type='absolute', dual_gap_limit=0.001, solver_time_limit=60, time_limit_dual_gap_limit=0.01, allow_unbalanced_recipe=False, confidence_score_weighting_factor=10): """ Args: product (dict): Dict containing an OpenFoodFact product. It must contain the keys "ingredients" and "nutriments" use_defined_prct (bool): Should ingredients percentages defined in the product be used? use_nutritional_info (bool): Should nutritional information be used to estimate recipe? const_relax_coef (float): Constraints relaxation coefficient. Allows to relax constraints on nutriments, water and mass balance to increase chances to get a result. maximum_evaporation (float): Upper bound of the evaporation coefficient [0-1[. I.e. maximum proportion of ingredients water that can evaporate. total_mass_used (float): Total mass of ingredient used in grams, if known. min_prct_dist_size (int): Minimum size of the ingredients percentage distribution that will be used to pick a proportion for an ingredient. If the distribution (adjusted to the possible value interval) has less data, uniform distribution will be used instead. dual_gap_type (str): 'absolute' or 'relative'. Determines the precision type of the variable optimization by the solver. dual_gap_limit (float): Determines the precision of the variable optimization by the solver. Relative or absolute according to dual_gap_type. solver_time_limit (float): Maximum time for the solver optimization (in seconds). Set to None or 0 to set no limit. time_limit_dual_gap_limit (float): Accepted precision of the solver in case of time limit hit. Relative or absolute according to dual_gap_type. allow_unbalanced_recipe (bool): If True, the total mass of ingredients used in the resulting recipe may be less than the final mass of the product. This is not physically possible but may be necessary to avoid systematical overestimation of the total mass of ingredients used. confidence_score_weighting_factor (float): Weighting factor used for the confidence score calculation. It corresponds to the weight of the nutritional distance against the absolute difference between the total mass and 100g/100g. """ self.product = product self.use_defined_prct = use_defined_prct self.use_nutritional_info = use_nutritional_info self.const_relax_coef = const_relax_coef self.min_dist_size = min_prct_dist_size self.total_mass_used = total_mass_used individualize_ingredients(self.product) self.top_level_ingredients = product['ingredients'] self.top_level_ingredients_names = [x['id'] for x in self.top_level_ingredients] self.leaf_ingredients = find_ingredients_graph_leaves(self.product) self.leaf_ingredients_names = [x['id'] for x in self.leaf_ingredients] self.all_ingredients = flat_ingredients_list_BFS(self.product) self.all_ingredients_names = [x['id'] for x in self.all_ingredients] self.decreasing_order_limit_rank = None self.dual_gap_type = dual_gap_type.lower() self.time_limit_dual_gap_limit = time_limit_dual_gap_limit self.allow_unbalanced_recipe = allow_unbalanced_recipe self.maximum_evaporation = maximum_evaporation self.confidence_score_weighting_factor = confidence_score_weighting_factor self.recipe = dict() # Defining a solver that will be used to define the range of possible recipes self.model = Model() if VERBOSITY < 3: self.model.hideOutput() if solver_time_limit: self.model.setParam('limits/time', solver_time_limit) if self.dual_gap_type == 'absolute': self.model.setParam('limits/absgap', dual_gap_limit) elif self.dual_gap_type == 'relative': self.model.setParam('limits/gap', dual_gap_limit) else: raise ValueError("The parameter dual_gap_type should be 'absolute' or 'relative'.") # Adding variables to the solver # Adding a variable for the total mass of ingredients used self.total_mass_var = self.model.addVar('total_mass_used', vtype='C', lb=MINIMUM_TOTAL_MASS_FOR_UNBALANCED_RECIPES if self.allow_unbalanced_recipe else 1) # If the total mass used is provided, add it as a constraint if self.total_mass_used is not None: self.model.addCons(self.total_mass_var == self.total_mass_used / 100) # The evaporation coefficient is one variable of the solver describing how much of the unprocessed ingredients # water is lost during food processing. It is not bounded to 1 to avoid infinite value of the total mass used. assert 0 <= maximum_evaporation < 1 self.evaporation_var = self.model.addVar('evaporation', vtype="C", lb=0, ub=self.maximum_evaporation) # INGREDIENTS VARIABLES # One variable per ingredient, corresponding to its proportion of the ingredients masses used self.ingredient_vars = dict() for ingredient_name in self.all_ingredients_names: self.ingredient_vars[ingredient_name] = self.model.addVar(ingredient_name, vtype="C", lb=0, ub=1) # If water is not present in the ingredient list, add it as water under 5% hasn't to be declared if 'en:water' not in self.top_level_ingredients_names: # Water may be in leaf ingredients but not in top level ingredients, in that case it must be individualized water_name = 'en:water' while water_name in self.leaf_ingredients_names: water_name += '*' self.ingredient_vars[water_name] = self.model.addVar(water_name, vtype="C", lb=0, ub=0.05) self.leaf_ingredients_names.append(water_name) # Creating a dict with ingredients nutritional data self.ingredients_data = dict() for ingredient_name in self.leaf_ingredients_names: if ingredient_name not in self.ingredients_data: self.ingredients_data[ingredient_name] = dict() for nutri_item in NUTRIMENTS_CATEGORIES + ['water', 'ash']: if (original_id(ingredient_name) in ingredients_data) \ and (nutri_item in ingredients_data[original_id(ingredient_name)].get('nutriments', [])): self.ingredients_data[ingredient_name][nutri_item] = \ ingredients_data[original_id(ingredient_name)]['nutriments'][nutri_item] else: # Giving default minimum and maximum nutriment/water content (0 and 100%) to unknown ingredients self.ingredients_data[ingredient_name][nutri_item] = {'min': 0, 'max': MAX_ASH_CONTENT if nutri_item == 'ash' else 100}
[docs] def _add_used_mass_constraint(self): """ Adding the constraint that the total used mass of ingredients is bounded by the evaporation coefficient. """ # Lower bound is already set in total_mass_var definition # Upper bound self.model.addCons(self.total_mass_var <= 1 / (1 - self.evaporation_var), name="Used mass bound")
[docs] def _add_total_leaves_percentage_constraint(self): """ The sum of the percentages of all leaf ingredients must be 100%. """ # Sum of the leaves self.model.addCons(sum([self.ingredient_vars[x] for x in self.leaf_ingredients_names]) == 1, name="Total percentage is 100%")
[docs] def _add_total_subingredients_percentages_constraint(self, ingredient): """ Recursive function to add for each compound ingredient the constraint that its percentage must equal the sum of the percentages of its subingredients. Args: ingredient (dict): Dict corresponding to a compound ingredient. """ if 'ingredients' in ingredient: # Adding the constraint self.model.addCons(sum([self.ingredient_vars[x['id']] for x in ingredient['ingredients']]) == self.ingredient_vars[ingredient['id']], name=f"Subingredients sum for {ingredient['id']}") # Recursive call for the subingredients for subingredient in ingredient['ingredients']: self._add_total_subingredients_percentages_constraint(subingredient)
[docs] def _add_mass_order_constraints(self, product): """ Recursive function to add the constraint that each (sub)ingredient must be in higher proportion than the next one of the same level. Args: product (dict): Dict corresponding to a product or a compound ingredient. """ if 'ingredients' in product: for i in range(len(product['ingredients']) - 1): ing_var = self.ingredient_vars[product['ingredients'][i]['id']] next_ing_var = self.ingredient_vars[product['ingredients'][i + 1]['id']] self.model.addCons(next_ing_var <= ing_var, name=f"{product['ingredients'][i]['id']}>=" f"{product['ingredients'][i + 1]['id']}") # Recursive call for the subingredients for ingredient in product['ingredients']: self._add_mass_order_constraints(ingredient)
[docs] def _add_evaporation_constraint(self): """ The product mass is bounded by: - Lower bound: The sum of the ingredients masses used multiplied by 1 minus the water they lost (evaporation coefficient multiplied by water content of the ingredient). Ingredients with unknown water content are supposed to have a water content of 1 for lower bound. - Upper bound: The sum of the ingredients masses used multiplied by 1 minus the water they lost (evaporation coefficient multiplied by water content of the ingredient) for ingredients with a known water content, plus the sum of ingredients with unknown water content masses (as they water content is supposed to be 0 for upper bound) """ # Lower bound self.model.addCons( self.total_mass_var * (1 - self.evaporation_var * ( sum([self.ingredient_vars[ing] * self.ingredients_data[ing]['water']['max'] / 100 for ing in self.ingredient_vars if ing in self.leaf_ingredients_names]) )) <= (1 + self.const_relax_coef), name="Product mass evaporation lower bound" ) # Upper bound self.product_mass_evaporation_upper_bound_constraint = \ self.model.addCons( self.total_mass_var * (1 - self.evaporation_var * ( sum([self.ingredient_vars[ing] * self.ingredients_data[ing]['water']['min'] / 100 for ing in self.ingredient_vars if ing in self.leaf_ingredients_names]) )) >= (1 - self.const_relax_coef), name="Product mass evaporation upper bound" )
[docs] def _add_product_mass_constraint(self): """ The product mass is bounded by the sum of all nutriments and the remaining water """ # Lower bound self.model.addCons( self.total_mass_var * ( sum([ self.ingredient_vars[ingredient] * (((1 - self.evaporation_var) * self.ingredients_data[ingredient]['water']['min'] / 100) + sum([self.ingredients_data[ingredient][nutriment]['min'] / 100 for nutriment in TOP_LEVEL_NUTRIMENTS_CATEGORIES + ['ash']])) for ingredient in self.leaf_ingredients_names ]) ) <= (1 + self.const_relax_coef), name="Product mass upper bound" ) # Upper bound self.model.addCons( self.total_mass_var * ( sum([self.ingredient_vars[ingredient] * (((1 - self.evaporation_var) * self.ingredients_data[ingredient]['water']['max'] / 100) + sum([self.ingredients_data[ingredient][nutriment]['max'] / 100 for nutriment in TOP_LEVEL_NUTRIMENTS_CATEGORIES + ['ash']])) for ingredient in self.leaf_ingredients_names]) ) >= (1 - self.const_relax_coef), name="Product mass upper bound" )
[docs] def _add_nutritional_constraints(self): """ Looping on all nutriments to add the constraint that the sum of the ingredients proportions weighted by their content in this nutriment must fit the nutritional content of the product. """ # Looping on nutrients for nutri_item in NUTRIMENTS_CATEGORIES: if nutri_item == 'other': continue # Checking that the product has no data quality warnings related to this nutrient if len([x for x in QUALITY_DATA_WARNINGS.get(nutri_item, []) if x in self.product.get('data_quality_tags', [])]) > 0: continue # Check if the product has this nutriment defined product_nutriment = self.product['nutriments'].get(nutri_item + '_100g') if product_nutriment is None: continue else: product_nutriment = float(product_nutriment) margins = nutritional_error_margin(nutriment=nutri_item, value=product_nutriment / 100) absolute_margin = margins['absolute'] relative_margin = margins['relative'] # Lower bound self.model.addCons( ((absolute_margin + (1 + relative_margin) * product_nutriment / 100) + self.const_relax_coef) >= (self.total_mass_var * sum([var * self.ingredients_data[name][nutri_item]['min'] / 100 for name, var in self.ingredient_vars.items() if name in self.leaf_ingredients_names])), name=f"Lower bound for {nutri_item}" ) # Upper bound self.model.addCons( ((-absolute_margin + (1 - relative_margin) * product_nutriment / 100) - self.const_relax_coef) <= (self.total_mass_var * (sum([var * self.ingredients_data[name][nutri_item]['max'] / 100 for name, var in self.ingredient_vars.items() if name in self.leaf_ingredients_names])) ), name=f"Upper bound for {nutri_item}" )
[docs] def _add_defined_percentage_constraints(self, product): """ Recursive function to add the constraints corresponding to the defined (sub)ingredients percentages. For top-level ingredients, defined percentages correspond to the percentage of the total mass of ingredients used before processing. For subingredients, the percentage corresponds either to the percentage of the parent ingredient or to the percentage of the product. This is determined by a preprocessing step made by ImpactEstimator._check_multilevel_ingredients. In cases where the percentage type is undefined, it is ignored. Args: product (dict): Dict corresponding to a product or a compound ingredient. """ for rank, ingredient in enumerate(product['ingredients']): if ingredient.get('percent'): # If the ingredient has a non null 'percent' field try: proportion = float(ingredient['percent']) / 100 if (product.get('percent-type') == 'product') \ or (product is self.product): # For top level ingredients self.model.addCons(self.ingredient_vars[ingredient['id']] == proportion, name=f"{ingredient['id']}: {ingredient['percent']}% of product") elif product.get('percent-type') == 'parent': self.model.addCons(self.ingredient_vars[ingredient['id']] == proportion * self.ingredient_vars[product['id']], name=f"{ingredient['id']}: {ingredient['percent']}% of parent") # Ingredients which percentage is lower than 2% does not need to be listed in decreasing # proportion order. If the percentage of the ingredient is lower than 2%, then replace the # decreasing proportion order constraint by a 2% maximum constraint for all following # ingredients. This is done only for top level ingredients. if (proportion <= DECREASING_PROPORTION_ORDER_LIMIT) \ and (product is self.product): self._remove_decreasing_order_constraint_from_rank(rank) except ValueError: # To pass errors in float casting pass # Recursive call for the subingredients if 'ingredients' in ingredient: self._add_defined_percentage_constraints(ingredient)
[docs] def _remove_decreasing_order_constraint_from_rank(self, rank): """ Removes the decreasing proportion order constraint for all ingredients from the given rank. If an ingredient is below a certain proportion (2% in EU regulation), it may not be indicated in decreasing proportion order. Args: rank (int): Rank of the ingredient from which the decreasing proportion order constraint shall be replaced by a maximum proportion constraint. """ if rank < (self.decreasing_order_limit_rank or len(self.top_level_ingredients)): # Removing constraints for r in range(rank, len(self.top_level_ingredients) - 1): constraint = [x for x in self.model.getConss() if x.name == f"{self.top_level_ingredients_names[r]}>=" f"{self.top_level_ingredients_names[r + 1]}"] # Checking if the constraint exists as the solver may have deleted it by itself if constraint: self.model.freeTransform() self.model.delCons(constraint[0]) # Adding maximum constraint : for r in range(rank + 1, len(self.top_level_ingredients)): self.model.freeTransform() self.model.addCons(self.ingredient_vars[self.top_level_ingredients_names[r]] <= DECREASING_PROPORTION_ORDER_LIMIT, name=f"{self.top_level_ingredients_names[r]}<=2%") self.decreasing_order_limit_rank = rank
[docs] def _optimize_variable(self, variable, direction='minimize'): """ Optimize the model and return the variable value. Args: variable (Variable): Variable to optimize direction (str): 'minimize' or 'maximize' Returns: float: Value of the optimized variable. """ if direction.lower() not in ('minimize', 'maximize'): raise ValueError self.model.freeTransform() self.model.setObjective(variable if direction.lower() == 'minimize' else -variable) self.model.optimize() if self.model.getStatus() not in ('optimal', 'gaplimit', 'timelimit'): raise RecipeCreationError # In case of time limit hit, check if the gap is higher than the gap tolerance for time limit if self.model.getStatus() == 'timelimit': gap = self.model.getGap() if self.dual_gap_type == 'absolute': if gap > self.time_limit_dual_gap_limit: raise SolverTimeoutError elif self.dual_gap_type == 'relative': if gap > self.time_limit_dual_gap_limit * self.model.getDualbound(): raise SolverTimeoutError return self.model.getVal(variable)
[docs] def _get_variable_bounds(self, variable): """ Use the solver to find the ingredient's lower and upper bound. Args: variable (Variable): Solver variable Returns: tuple: Tuple containing ingredient lower and upper bounds """ sup = self._optimize_variable(variable, direction='maximize') inf = self._optimize_variable(variable, direction='minimize') return inf, sup
[docs] def _pick_proportion(self, ingredient_name, inf, sup): """ Chooses a random proportion for this ingredient. Uses a reference percentage distribution if the distribution of this ingredient in this interval has enough data, else uses an uniform distribution. Args: ingredient_name (str): inf (float): Lower bound sup (float): Upper bound Returns: float: Proportion of this ingredient """ assert round(inf, 8) <= round(sup, 8) # Rounded values used to avoid precision errors # If the two bounds are the same, return it as percentage if round(inf, 8) == round(sup, 8): return inf # Converting from proportion to percentages (as reference distributions use percentages) inf, sup = inf * 100, sup * 100 # If the ingredient has a reference percentage distribution, use it, else use a uniform distribution if ingredient_name in ing_with_ref_prct_dist: # Getting the reference percentage distribution of this ingredient reference_distribution = ref_ing_dist[ref_ing_dist.id == ingredient_name] # Stripping the values outside of the interval of possible solutions reference_distribution = reference_distribution[inf <= reference_distribution.percent] reference_distribution = reference_distribution[reference_distribution.percent <= sup] # If the product has categories, looping on it from the most specific to the most general and # stopping the loop when there are enough data in the reference distribution of the ingredient for # this category. If no category has enough data, use the entire distribution. If the entire # distribution does not have enough data, use a uniform distribution. if self.product.get('categories_tags'): # If the product has a non empty category tags category_distribution = [] category_index = len(self.product['categories_tags']) while (len(category_distribution) < self.min_dist_size) and (category_index >= 0): category_index -= 1 category = self.product['categories_tags'][category_index] mask = reference_distribution.categories_tags.apply(lambda x: category in (x or [])) category_distribution = reference_distribution[mask] if len(category_distribution) >= self.min_dist_size: reference_distribution = category_distribution # If there are less values than required, use uniform distribution if len(reference_distribution) < self.min_dist_size: percent = uniform(inf, sup) else: bandwidth = (sup - inf) / 10 kde = KernelDensity(kernel='gaussian', bandwidth=bandwidth) kde.fit(reference_distribution.percent.values.reshape(-1, 1)) # Plotting the KDE for debug purpose, comment on production # x_plot = np.linspace(inf - 5 * bandwidth, sup + 5 * bandwidth, 1000)[:, np.newaxis] # y_plot = np.exp(kde.score_samples(x_plot)) # # fig, ax = plt.subplots() # # # Plotting distribution # ax.plot(x_plot, y_plot) # # Plotting data points # ax.scatter(reference_distribution.percent.values, np.zeros(len(reference_distribution)), # marker='+', alpha=0.2, color='darksalmon') # # Plotting the bounds # ax.axvline(sup, color="seagreen", linestyle="dashed", linewidth=4) # ax.axvline(inf, color="seagreen", linestyle="dashed", linewidth=4) # # ax.set_title( # f"{ingredient_name} - bw:{round(bandwidth, 2)} - density:{round(len(reference_distribution) / (sup - inf))}") # # plt.show() # Ensure the sampled value is between inf and sup percent = -1 while (percent < inf) or (percent > sup): percent = kde.sample()[0][0] else: percent = uniform(inf, sup) return percent / 100 # Converting back to proportion
[docs] @staticmethod def recipe_from_proportions(proportions, total_mass): """ Returns a recipe from ingredients proportions and a total mass. Sums masses of ingredients used multiple times. Args: proportions (dict): total_mass (float): Returns: dict: Examples: >>> RandomRecipeCreator.recipe_from_proportions({'en:egg':0.7, 'en:flour': 0.3}, 150) {'en:flour': 45.0, 'en:egg':105.0} """ ingredients_names = set() for name in proportions: ingredients_names.add(original_id(name)) return {name: sum([prop for name_2, prop in proportions.items() if original_id(name_2) == name]) * total_mass for name in ingredients_names}
[docs] def _pick_total_mass(self, proportions, use_nutritional_info): """ Choosing the total mass of ingredients used by maximizing the confidence score of the resulting recipe. Args: proportions (dict): Proportions of the ingredients. Returns: float: Total mass of ingredients used in g. """ if self.total_mass_used is not None: return self.total_mass_used elif not(use_nutritional_info) : return 100 # Getting the total mass variable bounds inf, sup = self._get_variable_bounds(self.total_mass_var) # If the difference between the two bounds is lower than the distribution step, simply return the mean value if (sup - inf) <= (TOTAL_MASS_DISTRIBUTION_STEP / 100): return 100 * (sup + inf) / 2 # Looping over the total masses ranges with a predefined step and computing the confidence score of each # resulting recipe max_conf_score = 0 result = inf # ### FOR DEBUG PURPOSE ONLY ### # total_masses = [] # conf_scores = [] # ############################## # Compute the total mass only if nutritional info are used and there is at least one top level category # nutriment in common recipe = self.recipe_from_proportions(proportions, inf * 100) recipe_nutriments = nutriments_from_recipe(recipe) if self.use_nutritional_info and any([f"{x}_100g" in self.product['nutriments'] for x in recipe_nutriments if x in TOP_LEVEL_NUTRIMENTS_CATEGORIES]): for total_mass in np.arange(inf, sup, TOTAL_MASS_DISTRIBUTION_STEP / 100): recipe = self.recipe_from_proportions(proportions, total_mass * 100) recipe_nutriments = nutriments_from_recipe(recipe) # In some cases, the total mass is too high and will give impossible nutritional composition, in that # case the confidence score calculation will raise a ValueError try: conf_score = confidence_score(nutri=recipe_nutriments, reference_nutri=self.product['nutriments'], total_mass=total_mass * 100, min_possible_mass=MINIMUM_TOTAL_MASS_FOR_UNBALANCED_RECIPES * 100 if self.allow_unbalanced_recipe else 100, max_possible_mass=100 / (1 - self.maximum_evaporation), weighting_factor=self.confidence_score_weighting_factor) except ValueError: continue # ### FOR DEBUG PURPOSE ONLY ### # total_masses.append(total_mass) # conf_scores.append(conf_score) # ############################## # If the conf score is higher than the max, update the result and the max if conf_score > max_conf_score: max_conf_score = conf_score result = total_mass # ### FOR DEBUG PURPOSE ONLY ### # plt.plot(total_masses, conf_scores) # plt.show() # ############################## result *= 100 return result
[docs] def random_recipe(self, use_nutritional_info=True): """ Create a possible recipe of a product given its ingredient list and nutritional data. The recipe is given for 100g of final product. Notes: The recipe of the product is estimated randomly. To do this, a linear programming solver is defined with these constraints: - the sum of all ingredients percentage must be 100; - the ingredients percentages are given in decreasing order; - the nutritional composition of the product is the sum of the nutritional composition of its ingredients (with an error margin specified by nutritional_info_precision); - some ingredients may have a defined percentage. Once the solver has been set, the algorithm loops through each ingredient in random order, and computes its possible values interval using the solver. Once the possibles values interval defined, it chooses a random proportion value for this ingredient within this interval and adds this value as a new constraint for the solver. If the ingredient has a reference percentage distribution (computed from existing OFF data), the random value will be picked following this distribution. If not, it will use a uniform distribution. Once all ingredients proportions have been defined, the same operation is done on the total mass used variable, by maximizing the confidence score of the resulting recipe. Returns: dict: Dictionary containing a possible recipe with ingredients ids as keys and masses in g as values. """ # Setting variables self.recipe = dict() # Resetting the recipe # Removing previous constraints self.model.freeTransform() for constraint in self.model.getConss(): self.model.delCons(constraint) # Adding constraints to the solver self._add_used_mass_constraint() self._add_total_leaves_percentage_constraint() for ingredient in self.top_level_ingredients: self._add_total_subingredients_percentages_constraint(ingredient) self._add_mass_order_constraints(self.product) self._add_evaporation_constraint() total_mass_lower_bound_constraint = self.model.addCons(self.total_mass_var >= 0.99, 'Total mass lower bound') # Checking that the product has no global data quality warnings related to nutrition before adding nutritional # constraints global_dqw = [x for x in QUALITY_DATA_WARNINGS['global'] if x in self.product.get('data_quality_tags', [])] if self.use_nutritional_info and not global_dqw: self._add_nutritional_constraints() self._add_product_mass_constraint() if self.use_defined_prct: self._add_defined_percentage_constraints(self.product) # Shuffling the ingredients leaf_ingredients_names = self.leaf_ingredients_names.copy() # Creating a copy to avoid messing with original shuffle(leaf_ingredients_names) # Looping over ingredients to pick a random proportion in their possible values interval proportions = dict() for ingredient_name in leaf_ingredients_names: inf, sup = self._get_variable_bounds(self.ingredient_vars[ingredient_name]) # Now the possible values interval has been calculated, # choose a random proportion within it for this ingredient proportion = self._pick_proportion(ingredient_name, inf, sup) proportions[ingredient_name] = proportion # Adding the choice of this value as a constraint to the problem self.model.freeTransform() self.model.addCons(self.ingredient_vars[ingredient_name] == proportion, name=f"{ingredient_name}: {proportion}") if (proportion <= DECREASING_PROPORTION_ORDER_LIMIT) \ and (ingredient_name in self.top_level_ingredients_names): self._remove_decreasing_order_constraint_from_rank( self.top_level_ingredients_names.index(ingredient_name)) if self.allow_unbalanced_recipe: self.model.freeTransform() self.model.delCons(total_mass_lower_bound_constraint) self.model.delCons(self.product_mass_evaporation_upper_bound_constraint) total_mass = self._pick_total_mass(proportions, use_nutritional_info) self.recipe = self.recipe_from_proportions(proportions, total_mass) if VERBOSITY >= 2: print(self.recipe) return self.recipe
[docs]class ImpactEstimator: def __init__(self, product, quantity=100, ignore_unknown_ingredients=True, use_defined_prct=True): """ Estimate the environmental impact of an Open Food Facts product by a Monte-Carlo approach. Notes: This algorithm is composed of a loop that will calculate the impacts of the product based on its ingredients. At each run of the loop, the impact values are stored and the loop ends either if the maximum number of runs have been reached or if the geometric mean of each computed impact values is stabilized within a given confidence interval. The impact values are calculated by doing a simple sum of all ingredients masses weighted by their own impact values acquired from external data. Args: product (dict): Dict containing an Open Food Facts product. It must contain the keys "ingredients" quantity (float): Quantity of product in grams for which the impact must be calculated. Default is 100g. ignore_unknown_ingredients (bool): Should ingredients absent of OFF taxonomy and without defined percentage be considered as parsing errors and ignored? use_defined_prct (bool): Should ingredients percentages defined in the product be used? """ self.start_time = time.time() self.product = copy.deepcopy(product) self.ignore_unknown_ingredients = ignore_unknown_ingredients self.ignored_unknown_ingredients = [] self.product_quantity = quantity self.adjusted_maximum_evaporation_coefficient = None # Assert the product has ingredients if 'ingredients' not in product: raise AttributeError("The product has no ingredients field.") if len(product['ingredients']) == 0: raise ValueError("The product ingredients list is empty.") # List of text warnings to characterize the result in some special cases (too many unknown ingredients for # example) self.warnings = [] # Removing allergens from the ingredients tree if 'allergens_tags' in self.product: self._remove_allergens() self._remove_double_parenthesis() # Performing checks on product type self._check_fermented_product() self._check_product_water_loss() self.check_butters_product() # Performing checks for multilevel ingredients self._check_ingredients() # Performing checks on the uncharacterized ingredients (ingredients with no nutrition and/or impact data) self.leaf_ingredients = find_ingredients_graph_leaves(self.product) self.nb_ing = len(self.leaf_ingredients) self.use_defined_prct_arg = use_defined_prct self.use_defined_prct = use_defined_prct # Performing checks on the not well informed nutrients AND in categories ID_AGRIBALYSE_ISSUES self._check_nutri_well_informed() self.uncharacterized_ingredients = { 'nutrition': [x for x in self.leaf_ingredients if 'nutriments' not in ingredients_data.get(original_id(x['id']), [])], 'impact': [x for x in self.leaf_ingredients if 'impacts' not in ingredients_data.get(original_id(x['id']), [])] } self.uncharacterized_ingredients_ids = { 'nutrition': list(set([original_id(x['id']) for x in self.uncharacterized_ingredients['nutrition']])), 'impact': list(set([original_id(x['id']) for x in self.uncharacterized_ingredients['impact']])) } # Lists that will store the mass of uncharacterized ingredients for each recipe self.uncharacterized_ingredients_mass_distribution = {'nutrition': [], 'impact': []} self.uncharacterized_ingredients_ratio = { 'nutrition': len(self.uncharacterized_ingredients['nutrition']) / self.nb_ing, 'impact': len(self.uncharacterized_ingredients['impact']) / self.nb_ing, } for characterization in 'nutrition', 'impact': if self.uncharacterized_ingredients_ratio[characterization] >= \ UNCHARACTERIZED_INGREDIENTS_RATIO_WARNING_THRESHOLD: self.warnings.append( f"The product has a high number of {characterization} uncharacterized ingredients: " f"{self.uncharacterized_ingredients_ratio[characterization]:.0%}") # Assert that the product has nutriments. If not add a warning and set use_nutritional_info_override to False self.use_nutritional_info_override = None if ('nutriments' not in self.product) \ or (all([f"{x}_100g" not in self.product.get('nutriments', dict()) for x in NUTRIMENTS_CATEGORIES])): self.use_nutritional_info_override = False self.warnings.append("The product has no recognized nutriment information.") # Perform checks on defined percentages of ingredients self._check_defined_percentages()
[docs] def _remove_double_parenthesis(self, compound_ingredient=None): """ If an ingredient is on the form ingredient(ingredient1(ingredient2)) so we think ingredient1 is an information of ingredient and ingredient 1 is ignored """ index = [x['id'] for x in self.product['ingredients']] product = compound_ingredient or self.product if compound_ingredient is not None \ and len(product['ingredients']) == 1: ingredient_id = product['ingredients'][0]['id'] if ingredient_id in off_taxonomy \ and len(product['ingredients'][0].get('ingredients', 'no_ingredients')) == 1: index_used = index.index(product['id']) del self.product['ingredients'][index_used]['ingredients'] self.warnings.append( f"{ingredient_id} has been identified as the information of an ingredient ({self.product['ingredients'][index_used]['id']}) and ignored.") if len(self.product['ingredients']) >= index_used + 1: product = self.product['ingredients'][index_used + 1] # Recursively call the method for the subingredients for ingredient in product.get('ingredients', []): if 'ingredients' in ingredient: self._remove_double_parenthesis(compound_ingredient=ingredient)
[docs] def _check_fermented_product(self): """ Checks if the product is fermented (alcohol or cheese for example). In that case, the carbohydrates should not be taken into account as the carbohydrates input of the ingredients may not be the same than the output in the product. """ identified_fermentation_agents = [x['id'] for x in flat_ingredients_list_BFS(self.product) if x['id'] in FERMENTATION_AGENTS] if identified_fermentation_agents: self.warnings.append(f"Fermentation agents are present in the product " f"({', '.join(identified_fermentation_agents)}). " f"Carbohydrates and sugars mass balance will not be considered to estimate potential " f"recipes") identified_fermented_product_categories = [x for x in self.product.get('categories_tags', []) if x in FERMENTED_FOOD_CATEGORIES] if identified_fermented_product_categories: self.warnings.append(f"The product belongs to fermented products categories " f"({', '.join(identified_fermented_product_categories)}). " f"Carbohydrates and sugars mass balance will not be considered to estimate potential " f"recipes") if identified_fermentation_agents or identified_fermented_product_categories: for nutrition_item_to_delete in ('carbohydrates', 'sugars'): try: del self.product['nutriments'][f"{nutrition_item_to_delete}_100g"] except KeyError: pass
[docs] def check_butters_product(self): """ Checks if the product is a butter. In that case, just the fat is be taken into account. """ butter_category = [x for x in self.product.get('categories_tags', []) if x in BUTTERS_FOOD_CATEGORIES] if butter_category: self.warnings.append(f"The product belongs to butter products categories " f"({', '.join(butter_category)}). " f"Fat mass balance only will be considered to estimate potential " f"recipes") for nutrition_item_to_delete in ('proteins', 'carbohydrates', 'sugars', 'fiber', 'salt'): try: del self.product['nutriments'][f"{nutrition_item_to_delete}_100g"] except KeyError: pass try: del self.product['nutriments'][f"{nutrition_item_to_delete}"] except KeyError: pass
[docs] def _check_product_water_loss(self): """ Some products (cheeses or butters for example) may have a bigger water loss than other. If the product is in a category with a high water loss potential, the maximum evaporation parameter will be automatically adjusted. """ detected_high_water_loss_categories = [cat for cat in self.product.get('categories_tags', []) if cat in HIGH_WATER_LOSS_CATEGORIES] adjusted_coeff = 0 warning_message = None for category in detected_high_water_loss_categories: if HIGH_WATER_LOSS_CATEGORIES[category] > adjusted_coeff: adjusted_coeff = HIGH_WATER_LOSS_CATEGORIES[category] warning_message = f"The category {category} may have an important water loss. " \ f"The maximum evaporation coefficient has been adjusted to {adjusted_coeff}." if detected_high_water_loss_categories: self.adjusted_maximum_evaporation_coefficient = adjusted_coeff self.warnings.append(warning_message)
[docs] def _check_nutri_well_informed(self): """ Checks if the informations are well informed (sum nutri > 50g) and category is 'coffee' or 'pepper'. In that case, the nutrients should not be taken into account as the nutrients input of the product may not be well informed. We force the 'nutriments_100g' to be equal to the ingredients_data. """ dic_nutri = {x: self.product['nutriments'].get(f"{x}_100g") for x in TOP_LEVEL_NUTRIMENTS_CATEGORIES} list_nutri = [self.product['nutriments'].get(f"{x}_100g", 0) for x in TOP_LEVEL_NUTRIMENTS_CATEGORIES] if sum(list_nutri) > 100: self.warnings.append(f"The nutrients are not well informed : the mass of nutrients is over 100g") elif sum(list_nutri) < 1: self.warnings.append(f"The nutrients are not well informed : the mass of nutrients is less 1g") # if the product is in the categories coffee or pepers and has just one ingredient (for ex 100g coffee) # and if the sum of nutrients is less than 10g # then we delete all the nutrients if self.product.get('categories_properties'): if (self.product['categories_properties'].get( 'agribalyse_proxy_food_code:en') in ID_AGRIBALYSE_ISSUES or \ self.product['categories_properties'].get( 'agribalyse_food_code:en') in ID_AGRIBALYSE_ISSUES) and len(self.leaf_ingredients) == 1: if sum([x == None for x in dic_nutri.values()]) == len(TOP_LEVEL_NUTRIMENTS_CATEGORIES) or sum( list_nutri) <= 10: self.warnings.append(f"The nutrients are not well informed.") value_nutri = {x: ingredients_data.get(original_id(y['id']))['nutriments'].get(x) for x in [f"{nutri}" for nutri in TOP_LEVEL_NUTRIMENTS_CATEGORIES] for y in self.leaf_ingredients} for nutriments in TOP_LEVEL_NUTRIMENTS_CATEGORIES: if value_nutri[nutriments] != None: try: self.product['nutriments'][f"{nutriments}_100g"] = value_nutri[nutriments]['value'] except KeyError: pass else: try: del self.product['nutriments'][f"{nutriments}_100g"] except KeyError: pass
[docs] def _remove_allergens(self, compound_ingredient=None): """ Removes allergens of the ingredient tree to avoid them to be considered as subingredients. """ product = compound_ingredient or self.product # If it is not the product itself, and it has only one ingredient, check if this ingredient is an allergen if compound_ingredient is not None \ and len(product['ingredients']) == 1: ingredient_id = product['ingredients'][0]['id'] if ingredient_id in off_taxonomy \ and 'allergens' in off_taxonomy[ingredient_id] : del product['ingredients'] self.warnings.append(f"{ingredient_id} has been identified as an allergen and ignored.") # Recursively call the method for the subingredients for ingredient in product.get('ingredients', []): if 'ingredients' in ingredient: self._remove_allergens(compound_ingredient=ingredient)
[docs] def _check_ingredients(self): """ Performs some checks on multilevel ingredients. """ # Remove subingredients from the list of ingredients, keeping them only as nested ingredients if OFF_INGREDIENTS_FORMAT == 'flat with rank': self.product['ingredients'] = [x for x in self.product['ingredients'] if 'rank' in x] # Removing ingredients absent of the OFF taxonomy if self.ignore_unknown_ingredients: ingredients_remover = UnknownIngredientsRemover() ingredients_remover.remove_unknown_ingredients(self.product) self.ignored_unknown_ingredients = ingredients_remover.removed_unknown_ingredients # If no ingredients are left after the unknown ingredients removal, abort the program if 'ingredients' not in self.product: raise NoKnownIngredientsError # Removing subingredients with no nutrition or impact and without percentages defined clear_ingredient_graph(self.product) # If no ingredients are left after the ingredient graph cleaning, abort the program if 'ingredients' not in self.product: raise NoCharacterizedIngredientsError # If there are still ingredients but none with impact, abort the program if len([ing for ing in find_ingredients_graph_leaves(self.product) if ing['id'] in ingredients_data and 'impacts' in ingredients_data[ing['id']]]) == 0: raise NoCharacterizedIngredientsError # If the only ingredient with an impact is en:water, abort the program ingredients_with_impacts = [x['id'] for x in self.product['ingredients'] if 'impacts' in ingredients_data.get(x['id'], dict())] if ingredients_with_impacts in ([], ['en:water']): raise NoKnownIngredientsError # Controlling if the subingredients percentages are given in percentage of the parent ingredient (relative) # or of the product (absolute) define_subingredients_percentage_type(self.product) # If ingredients have 'undefined' as percentage type, add a warning. nb_undefined_prct_ingredients = 0 for ingredient in flat_ingredients_list_BFS(self.product): if ingredient.get('percent-type') == 'undefined': nb_undefined_prct_ingredients += 1 if nb_undefined_prct_ingredients: self.warnings.append( f"{nb_undefined_prct_ingredients} compound ingredients whose percentage type is undefined.")
[docs] def reliability_score(self, const_relax_coef, uncharacterized_ingredients_mass_proportion): """ Reliability level of the result: - 1: Absolutely reliable, no indication of a potential issue in the input data nor in the result - 2: Less than 5% of the product ingredients are not in the OFF ingredients taxonomy and less than 5% of the estimated mass of the product is composed of ingredients that are not characterized nutritionally or environmentally and the constraints may have been relaxed by less than 0.05% in order to get a result. - 3: Between 5% and 25% of the product ingredients are not in the OFF ingredients taxonomy and between 5% and 25% of the estimated mass of the product is composed of ingredients that are not characterized nutritionally or environmentally and the constraints may have been relaxed by less than 0.05% in order to get a result. - 4: More than 25% of the ingredients are not in the OFF ingredients taxonomy or more than 25% of the estimated mass of the product is composed of ingredients that are not characterized nutritionally or environmentally, or the constraints has been relaxed by more than 0.05% in order to get a result or the is an important result warning. """ ignored_ingredient_ratio = len(self.ignored_unknown_ingredients) / (len(self.ignored_unknown_ingredients) + len(self.leaf_ingredients)) mapping_direct = sum(['direct' in ingredients_data.get(x['id'], {"nutritional_data_sources": [{'mapping_direct': 'not'}]})[ 'nutritional_data_sources'][0].get('mapping_direct', '') for x in self.leaf_ingredients]) mapping_undirect_ration = 1 - mapping_direct / len(self.leaf_ingredients) # If there is an important warning in the result, it cannot be reliable for blocking_warning in RESULTS_WARNINGS_NOT_RELIABLE: if any(blocking_warning in x for x in self.warnings): return 4 if const_relax_coef > 0.05: return 4 if (uncharacterized_ingredients_mass_proportion['nutrition'] == 0) \ and (uncharacterized_ingredients_mass_proportion['impact'] == 0) \ and (ignored_ingredient_ratio == 0) \ and (const_relax_coef == 0): # and (ignored_ingredient_ratio == 0): return 1 if (uncharacterized_ingredients_mass_proportion['nutrition'] <= 0.05) \ and (uncharacterized_ingredients_mass_proportion['impact'] <= 0.05) \ and (ignored_ingredient_ratio <= 0.05) \ and (const_relax_coef == 0): # ignored_ingredient_ratio return 2 if (uncharacterized_ingredients_mass_proportion['nutrition'] <= 0.25) \ and (uncharacterized_ingredients_mass_proportion['impact'] <= 0.25) \ and (ignored_ingredient_ratio <= 0.25) \ and (const_relax_coef <= 0.05): # ignored_ingredient_ratio return 3 return 4
[docs] def _check_defined_percentages(self): """ Assert that the percentages that might be defined for some ingredients are valid.""" # Checking that the defined percentages respect these constraints: # - Each ingredient percentage is within its "natural bounds" defined by its rank and the number of products # - The defined percentages does not prevent the total sum to be 100% # - The defined percentages are in decreasing proportion order # If the percentage of a top-level ingredient is not in its natural bounds, # it is probably a parsing error and should not be used. for rank, ingredient in enumerate(self.product['ingredients'], 1): if ingredient.get('percent'): bounds = natural_bounds(rank, self.nb_ing) if not (bounds[0] <= float(ingredient['percent']) <= bounds[1]): self.warnings.append(f"Inconsistencies were found in the defined percentages of the ingredients. " f"Defined percentage of \"{ingredient['id']}\" ({ingredient['percent']}%)" f" has not been used.") del ingredient['percent'] # If the remaining ingredients percentages are not in decreasing order, then at least one ingredient percentage # is incorrect but it is not possible to know which one, therefore none can be used defined_ingredients_percentages = [float(x['percent']) for x in self.product['ingredients'] if ('percent' in x) and (float(x.get('percent', 0)) > 2)] if not all(x >= y for x, y in zip(defined_ingredients_percentages, defined_ingredients_percentages[1:])): self.use_defined_prct = False # If the minimum (resp. maximum) theoretical percentage sum of these ingredients is higher (resp lower) # than 100, then at least one ingredient percentage is incorrect but it is not possible to know which one, # therefore none can be used # Minimum minimum_sum = 0 for rank, ingredient in enumerate(self.product['ingredients'], 1): # If the percentage of the ingredient is defined if 'percent' in ingredient: percentage = float(ingredient['percent']) # If the percentage is not defined, it is a least equal to the percentage of the first ingredient with a # defined percentage after the current one. else: next_ingredients_with_prct = [x for x in self.product['ingredients'][rank:] if 'percent' in x] if next_ingredients_with_prct: percentage = min(float(next_ingredients_with_prct[0]['percent']), natural_bounds(rank, self.nb_ing)[0]) else: percentage = natural_bounds(rank, self.nb_ing)[0] # Adding the percentage to the sum minimum_sum += percentage # If the sum of the minimum percentages is higher than 100, then at least one ingredient percentage is incorrect if minimum_sum > 105: self.use_defined_prct = False maximum_sum = 0 for rank, ingredient in enumerate(self.product['ingredients'], 1): # If the percentage of the ingredient is defined if 'percent' in ingredient: percentage = float(ingredient['percent']) # If the percentage is not defined, it is a most equal to the percentage of the first ingredient with a # defined percentage before the current one. else: next_ingredients_with_prct = [x for x in self.product['ingredients'][:rank - 1] if 'percent' in x] if next_ingredients_with_prct: percentage = max(float(next_ingredients_with_prct[-1]['percent']), natural_bounds(rank, self.nb_ing)[1]) else: percentage = natural_bounds(rank, self.nb_ing)[1] # Adding the percentage to the sum maximum_sum += percentage # If the sum of the maximum percentages is lower than 100, then at least one ingredient percentage is incorrect if maximum_sum < 95: self.use_defined_prct = False # Removing percentage value from the ingredients if use_defined_prct is False. This is only a security to avoid # to accidentally use ingredients defined percentages if self.use_defined_prct_arg and not self.use_defined_prct: self.warnings.append("Inconsistencies were found in the defined percentages of the ingredients. " "Defined percentages were not used for estimating the impact.") remove_percentage_from_product(self.product)
[docs] def estimate_impacts(self, impact_names, min_run_nb=30, max_run_nb=1000, forced_run_nb=None, confidence_interval_width=0.05, confidence_level=0.95, use_nutritional_info=True, const_relax_coef=0, maximum_evaporation=0.4, total_mass_used=None, min_prct_dist_size=30, dual_gap_type='absolute', dual_gap_limit=0.001, solver_time_limit=60, time_limit_dual_gap_limit=0.01, confidence_weighting=True, use_ingredients_impact_uncertainty=True, quantiles_points=('0.05', '0.25', '0.5', '0.75', '0.95'), distributions_as_result=False, confidence_score_weighting_factor=10): """ Looping by calculating a new random recipe at each loop and stopping when the geometric mean of recipes impacts values are stabilized within a given confidence interval. The convergence of the values is detected when the arithmetic mean of the log of the impact of the n-th first recipes has a normal distribution with a small enough confidence interval. Then the exponential of the values is taken to switch back to linear space and obtain the geometric mean of the impacts (geometric mean is the arithmetic mean of the log of the values). Args: impact_names (str or list): Iterable containing impacts names or single impact name. min_run_nb (int): Minimum number of run for the Monte-Carlo loop A too small number may result in a falsely converging value max_run_nb (int): Maximum number of run for the Monte-Carlo loop forced_run_nb (int): Used to bypass natural Monte-Carlo stopping criteria and force the number of runs confidence_interval_width (float): Width of the confidence interval that will determine the convergence detection. confidence_level (float): Confidence level of the confidence interval. use_nutritional_info (bool): Should nutritional information be used to estimate recipe? const_relax_coef (float): Constraints relaxation coefficient. Allows to relax constraints on nutriments, water and mass balance to increase chances to get a result. maximum_evaporation (float): Upper bound of the evaporation coefficient [0-1[. I.e. maximum proportion of ingredients water that can evaporate. total_mass_used (float): Total mass of ingredient used in grams, if known. min_prct_dist_size (int): Minimum size of the ingredients percentage distribution that will be used to pick a proportion for an ingredient. If the distribution (adjusted to the possible value interval) has less data, uniform distribution will be used instead. dual_gap_type (str): 'absolute' or 'relative'. Determines the precision type of the variable optimization by the solver. dual_gap_limit (float): Determines the precision of the variable optimization by the solver. Relative or absolute according to dual_gap_type. solver_time_limit (float): Maximum time for the solver optimization (in seconds). Set to None or 0 to set no limit. time_limit_dual_gap_limit (float): Accepted precision of the solver in case of time limit hit. Relative or absolute according to dual_gap_type. use_ingredients_impact_uncertainty (bool): Should ingredients impacts uncertainty data be used? confidence_weighting (bool): Should the recipes be weighted by their confidence score (deviation of the recipes nutritional composition to the reference product). quantiles_points (iterable): List of impacts quantiles cutting points to return in the result. distributions_as_result (bool): Should the recipes, the distributions of the impact, the mean confidence interval and the confidence score be added to the result? confidence_score_weighting_factor (float): Weighting factor used for the confidence score calculation. It corresponds to the weight of the nutritional distance against the absolute difference between the total mass and 100g/100g. Returns: dict: Dictionary containing the result (the average impacts of all computed recipes) as well as other attributes such as the standard deviation of the impacts of all computed recipes, the list of unknown ingredients contained in the product, the average mass percentage of unknown ingredients. """ if self.adjusted_maximum_evaporation_coefficient: maximum_evaporation = self.adjusted_maximum_evaporation_coefficient if ('nutriments' not in self.product) and use_nutritional_info: raise AttributeError("The product has no nutriments field. Set use_nutritional_info=False to force a " "result.") if (len(self.product['nutriments']) == 0) and use_nutritional_info: raise ValueError("The product nutriments list is empty. Set use_nutritional_info=False to force a result.") # Setting variables if forced_run_nb is not None: min_run_nb = 2 max_run_nb = forced_run_nb + 1 confidence_interval_width = 0 use_nutritional_info = use_nutritional_info if self.use_nutritional_info_override is None \ else self.use_nutritional_info_override # The use of allow_unbalanced_recipe=True is necessary to avoid overestimation of the ingredients total used # mass and thus the of the product impacts. recipe_creator = RandomRecipeCreator(product=self.product, use_defined_prct=self.use_defined_prct, use_nutritional_info=use_nutritional_info, const_relax_coef=const_relax_coef, maximum_evaporation=maximum_evaporation, total_mass_used=total_mass_used, min_prct_dist_size=min_prct_dist_size, dual_gap_type=dual_gap_type, dual_gap_limit=dual_gap_limit, solver_time_limit=solver_time_limit, time_limit_dual_gap_limit=time_limit_dual_gap_limit, allow_unbalanced_recipe=True, confidence_score_weighting_factor=confidence_score_weighting_factor) run = 0 recipes = [] impact_names = [impact_names] if type(impact_names) is str else impact_names impact_distributions = {impact_name: [] for impact_name in impact_names} impact_log_distributions = {impact_name: [] for impact_name in impact_names} confidence_score_distribution = [] total_used_mass_distribution = [] log_means = {impact_name: [] for impact_name in impact_names} mean_confidence_interval_distribution = {impact_name: [] for impact_name in impact_names} impact_sign = {impact_name: None for impact_name in impact_names} ingredients_impacts_share = {impact: dict() for impact in impact_names} convergence_reached = {impact_name: False for impact_name in impact_names} impacts_units = dict() impacts_quantiles = dict() impacts_relative_interquartile = dict() # Used to handle impacts that are skipped skipped_impacts = [] def skip_impact(impact_name): skipped_impacts.append(impact_name) del impact_distributions[impact_name] del impact_log_distributions[impact_name] del log_means[impact_name] del mean_confidence_interval_distribution[impact_name] del impact_sign[impact_name] del ingredients_impacts_share[impact_name] del convergence_reached[impact_name] consecutive_null_impact_characterized_ingredients_mass = 0 # Starting a loop that will end when the convergence is reached for all impacts while True: # Increment the run counter run += 1 break_main_loop = False # Getting a random recipe and adding its impacts to the distributions # To ensure there is no possible recipe, wait for several recipe creation errors before to raise an # exception. consecutive_recipe_creation_error = 0 while consecutive_recipe_creation_error < MAX_CONSECUTIVE_RECIPE_CREATION_ERROR: try: recipe_100g = recipe_creator.random_recipe(use_nutritional_info = use_nutritional_info) # RandomRecipeCreator.random_recipe() gives a result for 100g of final product. # Adapting the recipe to the product quantity recipe = {k: v * self.product_quantity / 100 for k, v in recipe_100g.items()} break except RecipeCreationError: consecutive_recipe_creation_error += 1 if VERBOSITY >= 1: print(f'Consecutive recipe creation error: {consecutive_recipe_creation_error}') if consecutive_recipe_creation_error >= MAX_CONSECUTIVE_RECIPE_CREATION_ERROR: raise RecipeCreationError # Computing the confidence score of the recipe # Compute the confidence score only if nutritional info are used and there is at least one top level # category nutriment in common between the computed recipe and the product's nutritional composition recipe_nutriments = nutriments_from_recipe(recipe_100g) if use_nutritional_info and confidence_weighting and any([f"{x}_100g" in self.product['nutriments'] for x in recipe_nutriments if x in TOP_LEVEL_NUTRIMENTS_CATEGORIES]): conf_score = confidence_score(nutri=recipe_nutriments, reference_nutri=self.product['nutriments'], total_mass=sum([x for x in recipe_100g.values()]), min_possible_mass=MINIMUM_TOTAL_MASS_FOR_UNBALANCED_RECIPES * 100, max_possible_mass=100 / (1 - maximum_evaporation)) else: # If the nutritional information is not used, all recipes are supposed to have the same confidence # level. conf_score = 1 confidence_score_distribution.append(conf_score) # Computing the mass of unknown ingredients and adding it to the distribution total_mass = sum([float(x) for x in recipe.values()]) total_used_mass_distribution.append(total_mass) for characterization in 'nutrition', 'impact': uncharacterized_ingredients_mass = \ sum([float(recipe[x]) / total_mass for x in self.uncharacterized_ingredients_ids[characterization]]) self.uncharacterized_ingredients_mass_distribution[characterization].append( uncharacterized_ingredients_mass) # Adding the recipe to the distribution recipes.append(recipe) # Computing the impact of the recipe for all impact categories for impact_name in impact_names: recipe_impact_calculator = RecipeImpactCalculator(recipe, impact_name, use_uncertainty=use_ingredients_impact_uncertainty) recipe_impact = recipe_impact_calculator.get_recipe_impact() if recipe_impact == 0: # In case of null impact values, the geometric approach is not applicable # TODO: In that case use a linear approach skip_impact(impact_name) self.warnings.append(f'Geometric mean could not be calculated for impact: {impact_name}.\n' f'This impact has been ignored.') continue # In some cases, the recipe impact is None (for ex: if all the ingredients with a characterized impact # have a null mass). In that case, rollback this loop run and continue if recipe_impact is None: # Rolling back changes run -= 1 recipes.pop() confidence_score_distribution.pop() impacts_to_rollback = impact_names[:impact_names.index(impact_name)] for impact_to_rollback in impacts_to_rollback: impact_distributions[impact_to_rollback].pop() impact_log_distributions[impact_to_rollback].pop() consecutive_null_impact_characterized_ingredients_mass += 1 if consecutive_null_impact_characterized_ingredients_mass >= \ MAX_CONSECUTIVE_NULL_IMPACT_CHARACTERIZED_INGREDIENTS_MASS: raise NoCharacterizedIngredientsError break # Breaking impact loop else: consecutive_null_impact_characterized_ingredients_mass = 0 recipe_impact_log = math.log(abs(recipe_impact)) # Switching to log space impact_distributions[impact_name].append(recipe_impact) impact_log_distributions[impact_name].append(recipe_impact_log) # Getting the sign of the recipe impact. # If it has changed from the previous loop, then a geometric mean cannot be computed # (both positive and negative values to aggregate) if impact_sign[impact_name] is None: impact_sign[impact_name] = recipe_impact / abs(recipe_impact) elif impact_sign[impact_name] != recipe_impact / abs(recipe_impact): # If there are both positive and negative values, do not calculate this impact and add a warning # TODO: In that case, instead of not calculating the impact, use a linear approach, by considering # the distribution of the impacts normal and looking for the impact convergence (not the impact # logs). skip_impact(impact_name) self.warnings.append(f'Geometric mean could not be calculated for impact: {impact_name}.\n' f'This impact has been ignored.') continue # Computing the average share of impact due to each ingredient for ingredient in [x for x in recipe if x not in self.ignored_unknown_ingredients]: try: ingredient_impact_share = recipe_impact_calculator.get_ingredient_impact_share(ingredient) if impact_name not in impacts_units: impacts_units[impact_name] = \ AGRIBALYSE_IMPACT_UNITS[agribalyse_impact_name_i18n(impact_name)] if run == 1: ingredients_impacts_share[impact_name][ingredient] = ingredient_impact_share else: if ingredient_impact_share is not None: # Iterative weighted arithmetic mean of the ingredient impact share ingredients_impacts_share[impact_name][ingredient] = \ ((sum(confidence_score_distribution[:- 1]) * ingredients_impacts_share[impact_name][ ingredient]) + (confidence_score_distribution[-1] * ingredient_impact_share)) / sum( confidence_score_distribution) except KeyError: ingredients_impacts_share[impact_name][ingredient] = None # Adding the weighted mean of the impacts logs distribution to the list of means log_means[impact_name].append(float(sms.DescrStatsW(data=impact_log_distributions[impact_name], weights=confidence_score_distribution if confidence_weighting else None).mean)) if run >= min_run_nb: # Estimating confidence interval using a Student distribution as the variance is unknown confidence_interval = sms.DescrStatsW(log_means[impact_name]) \ .tconfint_mean(alpha=1 - confidence_level) # Converting the confidence interval back to linear space confidence_interval = math.exp(confidence_interval[0]), math.exp(confidence_interval[1]) mean_confidence_interval_distribution[impact_name].append((confidence_interval[0], confidence_interval[1])) if ((confidence_interval[1] - confidence_interval[0]) / mean([confidence_interval[1], confidence_interval[0]])) < confidence_interval_width: convergence_reached[impact_name] = True # If the convergence has been reached for all impacts, ends the main while loop if all(convergence_reached.values()): break_main_loop = True # break if run >= max_run_nb: break_main_loop = True for impact_name_conv, conv in convergence_reached.items(): if not conv: self.warnings.append(f'Maximum run number has been reached before convergence ' f'of impact "{impact_name_conv}"') break if run == forced_run_nb: break_main_loop = True break # Once the loop is over, impacts_names can be edited impact_names = [x for x in impact_names if x not in skipped_impacts] if break_main_loop: break # Compute and return the result if no exception are raised uncharacterized_ingredients_mass_proportion = dict() for characterization in 'nutrition', 'impact': uncharacterized_ingredients_mass_proportion[characterization] = \ mean(self.uncharacterized_ingredients_mass_distribution[characterization]) if uncharacterized_ingredients_mass_proportion[characterization] \ > UNCHARACTERIZED_INGREDIENTS_MASS_WARNING_THRESHOLD: self.warnings.append(f"The estimated mass of {characterization} uncharacterized" f" ingredients in the product is high: " f"{uncharacterized_ingredients_mass_proportion[characterization]:.0%}") if self.ignored_unknown_ingredients: self.warnings.append(f"{len(self.ignored_unknown_ingredients)} ingredients have been ignored because they " "are absent of OFF ingredients taxonomy.") # Exponential used to switch back to linear space as the geometric mean is the exponential of the arithmetic # mean of the logs impacts_geom_means = { impact: impact_sign[impact] * math.exp( sms.DescrStatsW(data=impact_log_distributions[impact], weights=confidence_score_distribution if confidence_weighting else None).mean) for impact in impact_distributions} # The geometric stdev is the exponential of the square root of the variance of the log of the data impacts_geom_stdevs = {impact: math.exp(math.sqrt(sms.DescrStatsW(data=impact_log_distributions[impact], weights=confidence_score_distribution if confidence_weighting else None).var)) for impact in impact_distributions} # Computing the weighted quantiles of the impacts for impact_name in impact_names: quantiles = sms.DescrStatsW(data=impact_distributions[impact_name], weights=confidence_score_distribution if confidence_weighting else None).quantile([float(x) for x in quantiles_points]) impacts_quantiles[impact_name] = {str(quantiles_points[index]): value for index, value in enumerate(quantiles)} # Relative interquartile if '0.25' in quantiles: first_quartile = impacts_quantiles[impact_name]['0.25'] else: first_quartile = float(sms.DescrStatsW(data=impact_distributions[impact_name], weights=confidence_score_distribution if confidence_weighting else None).quantile(0.25)) if '0.75' in quantiles: third_quartile = impacts_quantiles[impact_name]['0.75'] else: third_quartile = float(sms.DescrStatsW(data=impact_distributions[impact_name], weights=confidence_score_distribution if confidence_weighting else None).quantile(0.75)) if '0.5' in quantiles: median = impacts_quantiles[impact_name]['0.5'] else: median = float(sms.DescrStatsW(data=impact_distributions[impact_name], weights=confidence_score_distribution if confidence_weighting else None).quantile(0.5)) impacts_relative_interquartile[impact_name] = (third_quartile - first_quartile) / median if impacts_relative_interquartile[impact_name] > IMPACT_RELATIVE_INTERQUARTILE_WARNING_THRESHOLD: self.warnings.append( f"The impact relative interquartile is high for {impact_name}" f" ({impacts_relative_interquartile[impact_name]:.0%})") # Computing the average total used mass average_total_used_mass = sms.DescrStatsW(data=total_used_mass_distribution, weights=confidence_score_distribution if confidence_weighting else None).mean # Computing the weighted average mass share of each ingredient # Poorly optimized... average_mass_shares = dict() for ingredient in recipes[0]: mass_shares = [x[ingredient] / sum(x.values()) for x in recipes] average_mass_shares[ingredient] = sms.DescrStatsW(data=mass_shares, weights=confidence_score_distribution if confidence_weighting else None).mean # Retrieving the databases entries related to each ingredient data_sources = dict() for ingredient in flat_ingredients_list_BFS(self.product): if (ingredient['id'] not in self.ignored_unknown_ingredients) and (ingredient['id'] in ingredients_data): ingredient_data = ingredients_data[ingredient['id']] ingredient_data_sources = dict() if 'environmental_impact_data_sources' in ingredient_data: ingredient_data_sources['environmental_impact'] = ingredient_data[ 'environmental_impact_data_sources'] if 'nutritional_data_sources' in ingredient_data: ingredient_data_sources['nutrition'] = ingredient_data['nutritional_data_sources'] if len(ingredient_data_sources) > 0: data_sources[ingredient['id']] = ingredient_data_sources result = {'impacts_geom_means': impacts_geom_means, 'impacts_geom_stdevs': impacts_geom_stdevs, 'impacts_quantiles': impacts_quantiles, 'impacts_relative_interquartile': impacts_relative_interquartile, 'ingredients_impacts_share': ingredients_impacts_share, 'ingredients_mass_share': average_mass_shares, 'impacts_units': impacts_units, 'product_quantity': self.product_quantity, 'const_relax_coef': const_relax_coef, 'warnings': self.warnings, 'reliability': self.reliability_score( const_relax_coef=const_relax_coef, uncharacterized_ingredients_mass_proportion=uncharacterized_ingredients_mass_proportion), 'ignored_unknown_ingredients': self.ignored_unknown_ingredients, 'uncharacterized_ingredients': self.uncharacterized_ingredients_ids, 'uncharacterized_ingredients_ratio': self.uncharacterized_ingredients_ratio, 'uncharacterized_ingredients_mass_proportion': uncharacterized_ingredients_mass_proportion, 'number_of_runs': run, 'number_of_ingredients': len(self.leaf_ingredients), 'average_total_used_mass': average_total_used_mass, 'calculation_time': time.time() - self.start_time, 'data_sources': data_sources } if distributions_as_result: result.update({'impact_distributions': impact_distributions, 'mean_confidence_interval_distribution': mean_confidence_interval_distribution, 'confidence_score_distribution': confidence_score_distribution, 'recipes': recipes, 'total_used_mass_distribution': total_used_mass_distribution}) return result
[docs]def estimate_impacts(product, impact_names, quantity=100, ignore_unknown_ingredients=True, min_run_nb=30, max_run_nb=1000, forced_run_nb=None, confidence_interval_width=0.05, confidence_level=0.95, use_nutritional_info=True, const_relax_coef=0, use_defined_prct=True, maximum_evaporation=0.4, total_mass_used=None, min_prct_dist_size=30, dual_gap_type='absolute', dual_gap_limit=0.001, solver_time_limit=60, time_limit_dual_gap_limit=0.01, confidence_weighting=True, use_ingredients_impact_uncertainty=True, quantiles_points=('0.05', '0.25', '0.5', '0.75', '0.95'), distributions_as_result=False, confidence_score_weighting_factor=10, safe_mode=True): """ Wrapper for impact estimation. Args: product (dict): Dict containing an OpenFoodFact product. It must contain the keys "ingredients" and "nutriments" impact_names (str or list): Iterable containing impacts names or single impact name. quantity (float): Quantity of product in grams for which the impact must be calculated. Default is 100g. ignore_unknown_ingredients (bool): Should ingredients absent of OFF taxonomy and without defined percentage be considered as parsing errors and ignored? min_run_nb (int): Minimum number of run for the Monte-Carlo loop A too small number may result in a falsely converging value max_run_nb (int): Maximum number of run for the Monte-Carlo loop forced_run_nb (int): Used to bypass natural Monte-Carlo stopping criteria and force the number of runs confidence_interval_width (float): Width of the confidence interval that will determine the convergence detection. confidence_level (float): Confidence level of the confidence interval. use_nutritional_info (bool): Should nutritional information be used to estimate recipe? const_relax_coef (float): Constraints relaxation coefficient. Allows to relax constraints on nutriments, water and mass balance to increase chances to get a result. use_defined_prct (bool): Should ingredients percentages defined in the product be used? maximum_evaporation (float): Upper bound of the evaporation coefficient [0-1[. I.e. maximum proportion of ingredients water that can evaporate. total_mass_used (float): Total mass of ingredient used in grams, if known. min_prct_dist_size (int): Minimum size of the ingredients percentage distribution that will be used to pick a proportion for an ingredient. If the distribution (adjusted to the possible value interval) has less data, uniform distribution will be used instead. dual_gap_type (str): 'absolute' or 'relative'. Determines the precision type of the variable optimization by the solver. dual_gap_limit (float): Determines the precision of the variable optimization by the solver. Relative or absolute according to dual_gap_type. solver_time_limit (float): Maximum time for the solver optimization (in seconds). Set to None or 0 to set no limit. time_limit_dual_gap_limit (float): Accepted precision of the solver in case of time limit hit. Relative or absolute according to dual_gap_type. confidence_weighting (bool): Should the recipes be weighted by their confidence score (deviation of the recipes nutritional composition to the reference product). use_ingredients_impact_uncertainty (bool): Should ingredients impacts uncertainty data be used? quantiles_points (iterable): List of impacts quantiles cutting points to return in the result. distributions_as_result (bool): Should the recipes, the distributions of the impact, the mean confidence interval and the confidence score be added to the result? confidence_score_weighting_factor (float): Weighting factor used for the confidence score calculation. It corresponds to the weight of the nutritional distance against the absolute difference between the total mass and 100g/100g. safe_mode (bool): If set to True, the constraints will be progressively relaxed in order to get a result. """ impact_estimator_kwargs = dict(product=product, quantity=quantity, ignore_unknown_ingredients=ignore_unknown_ingredients, use_defined_prct=use_defined_prct) impact_estimation_method_kwargs = dict(impact_names=impact_names, min_run_nb=min_run_nb, max_run_nb=max_run_nb, forced_run_nb=forced_run_nb, confidence_interval_width=confidence_interval_width, confidence_level=confidence_level, use_nutritional_info=use_nutritional_info, const_relax_coef=const_relax_coef, maximum_evaporation=maximum_evaporation, total_mass_used=total_mass_used, min_prct_dist_size=min_prct_dist_size, dual_gap_type=dual_gap_type, dual_gap_limit=dual_gap_limit, solver_time_limit=solver_time_limit, time_limit_dual_gap_limit=time_limit_dual_gap_limit, confidence_weighting=confidence_weighting, use_ingredients_impact_uncertainty=use_ingredients_impact_uncertainty, quantiles_points=quantiles_points, distributions_as_result=distributions_as_result, confidence_score_weighting_factor=confidence_score_weighting_factor) # First attempt for getting a result with provided kwargs try: impact_estimator = ImpactEstimator(**impact_estimator_kwargs) return impact_estimator.estimate_impacts(**impact_estimation_method_kwargs) except (RecipeCreationError, SolverTimeoutError) as original_exception: # If the safe mode is not enabled, raise the exception, else retry with relaxed constraints if not safe_mode: raise original_exception # Preparing kwargs to loop on, with decreasing constraints constraints_levels = [ {'use_defined_prct': True, 'const_relax_coef': 0.01}, {'use_defined_prct': True, 'const_relax_coef': 0.05}, {'use_defined_prct': True, 'const_relax_coef': 0.1}, {'use_defined_prct': True, 'const_relax_coef': 0.2}, {'use_defined_prct': True, 'const_relax_coef': 0.3}, {'use_defined_prct': True, 'const_relax_coef': 0.4}, {'use_defined_prct': True, 'const_relax_coef': 0.5}, {'use_defined_prct': True, 'const_relax_coef': 0.6}, {'use_defined_prct': True, 'const_relax_coef': 0.7}, {'use_defined_prct': True, 'const_relax_coef': 0.8}, {'use_defined_prct': True, 'const_relax_coef': 0.9}, {'use_defined_prct': True, 'const_relax_coef': 1}, {'use_defined_prct': False, 'const_relax_coef': 0.01}, {'use_defined_prct': False, 'const_relax_coef': 0.05}, {'use_defined_prct': False, 'const_relax_coef': 0.1}, {'use_defined_prct': False, 'const_relax_coef': 0.2}, {'use_defined_prct': False, 'const_relax_coef': 0.3}, {'use_defined_prct': False, 'const_relax_coef': 0.4}, {'use_defined_prct': False, 'const_relax_coef': 0.5}, {'use_defined_prct': False, 'const_relax_coef': 0.6}, {'use_defined_prct': False, 'const_relax_coef': 0.7}, {'use_defined_prct': False, 'const_relax_coef': 0.8}, {'use_defined_prct': False, 'const_relax_coef': 0.9}, {'use_defined_prct': False, 'const_relax_coef': 1} ] for constraints_level in constraints_levels: new_impact_estimator_kwargs = copy.deepcopy(impact_estimator_kwargs) new_impact_estimation_method_kwargs = copy.deepcopy(impact_estimation_method_kwargs) added_warnings = [] for kwarg, value in constraints_level.items(): # Avoid to use a more restrictive parameter than the kwargs provided if kwarg == 'use_defined_prct': original_kwarg_value = impact_estimator_kwargs.get(kwarg, True) new_value = value and original_kwarg_value new_impact_estimator_kwargs['use_defined_prct'] = new_value elif kwarg == 'const_relax_coef': original_kwarg_value = impact_estimation_method_kwargs.get(kwarg, 0) new_value = max(value, original_kwarg_value) new_impact_estimation_method_kwargs['const_relax_coef'] = new_value else: raise Exception('Not implemented.') # Add the change of parameter in the warnings if new_value != original_kwarg_value: added_warnings.append( f"Parameter {kwarg} has been set to {new_value} in order to get a result.") try: impact_estimator = ImpactEstimator(**new_impact_estimator_kwargs) result = impact_estimator.estimate_impacts(**new_impact_estimation_method_kwargs) result['warnings'] += added_warnings return result except (RecipeCreationError, SolverTimeoutError): pass # If no result has been returned with more permissive parameters, raise the original error raise original_exception
[docs]def estimate_impacts_safe(product, impact_names, **kwargs): warnings.warn(message="This function is deprecated. Use estimate_impacts() with safe_mode=True instead.", category=Warning) return estimate_impacts(product=product, impact_names=impact_names, **kwargs)