""" Functions used by the environmental impact estimation program """
import copy
import numpy as np
from math import sqrt
from impacts_estimation.vars import NUTRIMENTS_CATEGORIES, TOP_LEVEL_NUTRIMENTS_CATEGORIES, \
AGRIBALYSE_IMPACT_CATEGORIES_EN_TO_FR, AGRIBALYSE_IMPACT_CATEGORIES_FR
from data import ingredients_data, off_taxonomy
[docs]def nutriments_from_recipe(recipe):
"""
Return the nutriments content of a product recipe by a weighted sum of the ingredients masses and reference
nutriment contents.
Args:
recipe (dict): Dict containing ingredients as keys and masses in grams as values
Warning:
Any ingredients whose nutriment content is unknown will be considered to have the average nutriment content
of the product.
Returns:
dict: Dictionary with nutriments as keys and nutriment contents as values
"""
result = dict()
total_mass = sum([float(x) for x in recipe.values()])
for nutriment in NUTRIMENTS_CATEGORIES:
known_ingredients_mass = 0
result[nutriment] = 0
for ingredient in recipe:
if (ingredient in ingredients_data) and (nutriment in ingredients_data[ingredient].get('nutriments', [])):
known_ingredients_mass += float(recipe[ingredient])
result[nutriment] += float(recipe[ingredient]) * \
ingredients_data[ingredient]['nutriments'][nutriment]['value'] \
/ 100 # Ingredients nutriment contents are given per 100g
if known_ingredients_mass == 0:
del result[nutriment]
# Inflating the nutriment content of the known ingredients to the nutriment content of the total mass of these
# ingredients
else:
result[nutriment] = result[nutriment] * total_mass / known_ingredients_mass
return result
[docs]def confidence_score(nutri, reference_nutri, total_mass, min_possible_mass, max_possible_mass, weighting_factor=10,
reference_mass=100):
"""
Calculate the confidence score of a nutritional composition using the euclidean distance between the reference
nutritional composition and the assessed nutritional composition in the space of all considered nutriments
contents and the total mass of ingredients used. The closer the nutritional composition is from the reference, the
higher the confidence score is. The nearest of 100g/100g the total mass of ingredients is, the higher the confidence
score is.
The score is defined as the inverse of the sum of the nutritional distance and the absolute difference between the
total mass and 100g/100g weighted by a weighting factor.
Args:
nutri (dict): Nutritional composition to evaluate.
reference_nutri (dict): Nutritional composition of the reference product.
total_mass (float): Total mass of ingredients used in g.
min_possible_mass (float): Minimum possible total ingredient mass for a product in g
max_possible_mass (float): Maximum possible total ingredient mass for a product in g
weighting_factor (float): Weight of the nutritional distance against the absolute difference between
the total mass and 100g/100g.
reference_mass (float): Mass for which the nutritional compositions are expressed (in g).
Returns:
float: Confidence score
"""
assert round(min_possible_mass) <= round(total_mass) <= round(max_possible_mass)
total_mass = total_mass / reference_mass
min_possible_mass = min_possible_mass / reference_mass
max_possible_mass = max_possible_mass / reference_mass
# Removing "_100g" from reference_nutri keys
reference_nutri = {k.replace('_100g', ''): v for k, v in reference_nutri.items()}
# Calculating nutritional distance
squared_differences = []
n = 0
for nutriment in nutri:
if nutriment in TOP_LEVEL_NUTRIMENTS_CATEGORIES:
if nutriment in reference_nutri:
n += 1 # Incrementing the number of considered dimensions (nutriments)
difference = (float(reference_nutri[nutriment]) / reference_mass) \
- (float(nutri[nutriment]) / reference_mass)
squared_difference = round(difference ** 2, 6)
# Setting a minimal squared difference to avoid extremely high confidence score values in case of very
# similar nutritional compositions
squared_difference = max(squared_difference, 0.0000001)
if squared_difference > 1:
raise ValueError("The squared difference cannot be superior to 1.")
squared_differences.append(squared_difference)
# The distance in the n-dimensional space is the square root of the sum of the squared differences
nutri_distance = sqrt(sum(squared_differences))
# Normalizing by the maximum possible distance sqrt(2)
normalized_nutri_distance = nutri_distance / sqrt(2)
# Calculating total mass likelihood coefficient
if total_mass < 1:
mass_diff = (1 - total_mass) / (1 - min_possible_mass)
else:
mass_diff = (total_mass - 1) / (max_possible_mass - 1)
return 1 / ((normalized_nutri_distance * weighting_factor) + mass_diff)
[docs]def natural_bounds(rank, nb_ingredients):
"""
Computes the upper and lower bounds of the proportion of an ingredient depending on its rank and the number of
ingredients in the product given that they are in decreasing proportion order.
Examples:
>>> natural_bounds(2, 4)
(0.0, 50.0)
>>> natural_bounds(1, 5)
(20.0, 100.0)
Args:
rank (int): Rank of the ingredient in the list
nb_ingredients (int): Number of ingredients in the product
Returns:
tuple: Lower and upper bounds of the proportion of the ingredient
"""
if rank == 1:
return 100 / nb_ingredients, 100.0
else:
return .0, 100 / rank
[docs]def nutritional_error_margin(nutriment, value):
"""
Returns the error margin of a product's nutriment according to EU directives
Args:
nutriment (str): Nutriment considered
value (float): Given product content of the considered nutriment
Returns:
dict: Dictionary containing absolute and relative margins (only one of which is different from 0)
Examples:
>>>nutritional_error_margin('proteins', 0.05)
{'absolute': 0.02, 'relative': 0}
>>>nutritional_error_margin('proteins', 0.3)
{'absolute': 0, 'relative': 0.2}
"""
value = float(value)
assert 0 <= value <= 1
if nutriment.lower() in ('proteins', 'carbohydrates', 'sugars', 'fiber'):
if 0 <= value < 0.1:
return {'absolute': 0.02, 'relative': 0}
elif 0.1 <= value < 0.4:
return {'absolute': 0, 'relative': 0.2}
elif 0.4 <= value <= 1:
return {'absolute': 0.08, 'relative': 0}
elif nutriment.lower() == 'fat':
if 0 <= value < 0.1:
return {'absolute': 0.015, 'relative': 0}
elif 0.1 <= value < 0.4:
return {'absolute': 0, 'relative': 0.2}
elif 0.4 <= value <= 1:
return {'absolute': 0.08, 'relative': 0}
elif nutriment.lower() == 'saturated-fat':
if 0 <= value < 0.04:
return {'absolute': 0.008, 'relative': 0}
elif 0.04 <= value <= 1:
return {'absolute': 0, 'relative': 0.2}
elif nutriment.lower() == 'salt':
if 0 <= value < 0.0125:
return {'absolute': 0.00375, 'relative': 0}
elif 0.0125 <= value <= 1:
return {'absolute': 0, 'relative': 0.2}
else:
raise ValueError('The nutriment is not recognized.')
[docs]def clear_ingredient_graph(product):
"""
Recursive function to search the ingredients graph and remove subingredients if all subingredients of a same
ingredient are uncharacterized
Args:
product (dict): Dict corresponding to a product or a compound ingredient.
"""
ingredients = product['ingredients']
for ingredient in ingredients:
# If the ingredient has subingredients, recursively call the function
if 'ingredients' in ingredient:
clear_ingredient_graph(ingredient)
# If no subingredients are known, have known subingredients or defined percentage, delete them all
if len([x for x in ingredients
if (x['id'] in ingredients_data)
or ('ingredients' in x)
or ('percent' in x)]
) == 0:
del product['ingredients']
[docs]def minimum_percentage_sum(ingredients):
"""
Computes the minimum sum of ingredients percentages for ingredients given in decreasing percentage order, even if
some ingredients does not have a percentage.
Notes:
This is useful to estimate if subingredients percentages are defined in percentage of their parent ingredient
or in percentage of the total product.
Args:
ingredients (list): List of dicts corresponding to the ingredients
Returns:
float: Minimum value of the sum of all ingredients percentages.
"""
# Looping from least present ingredient to most present
ingredients = copy.deepcopy(ingredients)
ingredients.reverse()
minimum_sum = 0
minimum_percentage = 0
for ingredient in ingredients:
if 'percent' in ingredient:
minimum_percentage = float(ingredient['percent'])
minimum_sum += minimum_percentage
return minimum_sum
[docs]def maximum_percentage_sum(ingredients):
"""
Computes the maximum sum of ingredients percentages for ingredients given in decreasing percentage order, even if
some ingredients does not have a percentage.
Notes:
This is useful to estimate if subingredients percentages are defined in percentage of their parent ingredient
or in percentage of the total product.
Args:
ingredients (list): List of dicts corresponding to the ingredients
Returns:
float: Maximum value of the sum of all ingredients percentages.
"""
maximum_sum = 0
maximum_percentage = 100
for ingredient in ingredients:
if 'percent' in ingredient:
maximum_percentage = float(ingredient['percent'])
maximum_sum += maximum_percentage
return maximum_sum
[docs]def define_subingredients_percentage_type(product):
"""
Recursive function to search the ingredients graph and define if the subingredients percentages are defined as
percentage of their parent ingredient or the whole product.
Args:
product (dict): Dict corresponding to a product or a compound ingredient.
"""
for rank, ingredient in enumerate(product['ingredients'], 1):
if ingredient.get('ingredients'):
# Recursive call for each subingredients:
define_subingredients_percentage_type(ingredient)
if not any('percent' in x for x in ingredient['ingredients']):
continue
parent_percentage = True
product_percentage = True
# If the maximum sum of the subingredients percentages is lower than 100, then the percentages cannot
# be given in percentage of the parent
if maximum_percentage_sum(ingredient['ingredients']) < 100:
parent_percentage = False
# If the minimum sum of the subingredients percentages is higher than the parent ingredient percentage
# or its natural upper bound (if the parent has no percentage), then the subingredients percentages
# cannot be given in percentage of the product
parent_ingredient_percentage = min(float(ingredient.get('percent', 100)),
natural_bounds(rank, len(product['ingredients']))[1])
if minimum_percentage_sum(ingredient['ingredients']) > parent_ingredient_percentage:
product_percentage = False
if parent_percentage and not product_percentage:
ingredient['percent-type'] = 'parent'
elif product_percentage and not parent_percentage:
ingredient['percent-type'] = 'product'
else:
ingredient['percent-type'] = 'undefined'
[docs]def flat_ingredients_list_BFS(product):
"""
Recursive function to search the ingredients graph by doing a Breadth First Search and return it as a flat list of
all nodes.
Sub ingredients are placed at the end of the list.
Args:
product (dict): Dict corresponding to a product or a compound ingredient.
Returns:
list: List containing all the ingredients graph nodes.
"""
nodes = []
if 'ingredients' in product:
ingredients = copy.deepcopy(product['ingredients']) # Deepcopy to avoid deleting the graph structure
nodes += ingredients
for ingredient in ingredients:
nodes += flat_ingredients_list_BFS(ingredient)
if 'ingredients' in ingredient:
del ingredient['ingredients']
return nodes
[docs]def flat_ingredients_list_DFS(product):
"""
Recursive function to search the ingredients graph by doing a Depth First Search and return it as a flat list of
all nodes.
Sub ingredients are placed right after their parents.
Args:
product (dict): Dict corresponding to a product or a compound ingredient.
Returns:
list: List containing all the ingredients graph nodes.
"""
if 'ingredients' in product:
product_without_ingredients = copy.deepcopy(product)
del product_without_ingredients['ingredients']
if '_id' in product: # It is a product and not a compound ingredient:
return [y for x in product['ingredients'] for y in flat_ingredients_list_DFS(x)]
else:
return [product_without_ingredients] + [y for x in product['ingredients'] for y in
flat_ingredients_list_DFS(x)]
else:
return [product]
[docs]def find_ingredients_graph_leaves(product):
"""
Recursive function to search the ingredients graph and find its leaves.
Args:
product (dict): Dict corresponding to a product or a compound ingredient.
Returns:
list: List containing the ingredients graph leaves.
"""
if 'ingredients' in product:
leaves = []
for ingredient in product['ingredients']:
subleaves = find_ingredients_graph_leaves(ingredient)
if type(subleaves) == list:
leaves += subleaves
else:
leaves.append(subleaves)
return leaves
else:
return product
[docs]def individualize_ingredients(product, previous_ingredients_ids=None):
"""
Process an ingredient list in place to ensure that they all have a different id.
Args:
product (dict): Dict corresponding to a product, containing a list of ingredients, may contain compound
ingredients
previous_ingredients_ids (list): List containing ingredients ids. Needed only for recursive call
Examples:
>>> product = {'ingredients': [{'id': 'A'}, {'id': 'B', 'ingredients': [{'id': 'A'}]}, {'id': 'B'}]}
>>> individualize_ingredients(product)
>>> print(product)
{'ingredients': [{'id': 'A'}, {'id': 'B', 'ingredients': [{'id': 'A*'}]}, {'id': 'B*'}]}
"""
ingredients_ids = previous_ingredients_ids or []
for ingredient in product['ingredients']:
# Appending an asterisk to the id as long as the id already exists
while ingredient['id'] in ingredients_ids:
ingredient['id'] += '*'
ingredients_ids.append(ingredient['id'])
if 'ingredients' in ingredient:
individualize_ingredients(ingredient, previous_ingredients_ids=ingredients_ids)
[docs]def original_id(individualized_id):
"""
Gets the original id of an ingredient that has been transformed by individualize_ingredients()
Args:
individualized_id (str):
Returns:
str:
Examples:
>>> original_id('en:water**')
'en:water'
>>> original_id('en:sugar')
'en:sugar'
"""
return individualized_id.strip('*')
[docs]class UnknownIngredientsRemover:
def __init__(self):
self.removed_unknown_ingredients = []
[docs] def remove_unknown_ingredients(self, product):
"""
Recursive function to remove ingredients if they are not in the OFF taxonomy or if they do not have a
defined percentage or valid subingredients.
"""
if 'ingredients' in product:
# Recursive call on each subingredients
for ingredient in product['ingredients']:
self.remove_unknown_ingredients(ingredient)
# Creating an iteration copy
iter_ingredients = copy.deepcopy(product['ingredients'])
# Removing ingredients from the list if they do not have sub-ingredients,
# nor defined percentage and are not in the OFF taxonomy
for ingredient in iter_ingredients:
if ('ingredients' not in ingredient) \
and ('percent' not in ingredient) \
and ingredient['id'] not in off_taxonomy:
product['ingredients'].remove(ingredient)
self.removed_unknown_ingredients.append(ingredient['id'])
# Removing the 'ingredients' key if empty
if len(product['ingredients']) == 0:
del product['ingredients']
[docs]def remove_percentage_from_product(product):
"""
Removes the defined percentage of ingredients.
Args:
product (dict):
"""
for ingredient in product['ingredients']:
if 'percent' in ingredient:
del ingredient['percent']
if 'ingredients' in ingredient:
remove_percentage_from_product(ingredient)
[docs]def weighted_geometric_mean(values, weights):
"""
Returns the weighted geometric mean of values.
Args:
values (iterable):
weights (iterable):
Returns:
float:
"""
assert len(values) == len(weights)
return np.exp(sum([weights[i] * np.log(values[i]) for i in range(len(values))]) /
sum([weights[i] for i in range(len(values))]))
[docs]def agribalyse_impact_name_i18n(impact_name):
"""
Returns the French version of an impact name
Args:
impact_name (str):
Examples:
>>> agribalyse_impact_name_i18n('Climate change')
'Changement climatique'
>>> agribalyse_impact_name_i18n("Appauvrissement de la couche d'ozone")
'Appauvrissement de la couche d'ozone'
"""
if impact_name in AGRIBALYSE_IMPACT_CATEGORIES_EN_TO_FR:
return AGRIBALYSE_IMPACT_CATEGORIES_EN_TO_FR[impact_name]
elif impact_name in AGRIBALYSE_IMPACT_CATEGORIES_FR:
return impact_name
else:
raise ValueError(f'Unrecognized impact: {impact_name}')