Coverage for opt/mealie/lib/python3.12/site-packages/mealie/services/parser_services/brute/process.py: 14%
136 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-11-25 15:32 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-11-25 15:32 +0000
1import string 1a
2import unicodedata 1a
4from pydantic import BaseModel, ConfigDict 1a
6from ..parser_utils import check_char, move_parens_to_end 1a
9class BruteParsedIngredient(BaseModel): 1a
10 food: str = "" 1a
11 note: str = "" 1a
12 amount: float = 1.0 1a
13 unit: str = "" 1a
14 model_config = ConfigDict(str_strip_whitespace=True) 1a
17def parse_fraction(x): 1a
18 if len(x) == 1 and "fraction" in unicodedata.decomposition(x):
19 frac_split = unicodedata.decomposition(x[-1:]).split()
20 return float((frac_split[1]).replace("003", "")) / float((frac_split[3]).replace("003", ""))
21 else:
22 frac_split = x.split("/")
23 if len(frac_split) != 2:
24 raise ValueError
25 try:
26 return int(frac_split[0]) / int(frac_split[1])
27 except ZeroDivisionError as e:
28 raise ValueError from e
31def parse_amount(ing_str) -> tuple[float, str, str]: 1a
32 def keep_looping(ing_str, end) -> bool:
33 """
34 Checks if:
35 1. the end of the string is reached
36 2. or if the next character is a digit
37 3. or if the next character looks like an number (e.g. 1/2, 1.3, 1,500)
38 """
39 if end >= len(ing_str):
40 return False
42 if ing_str[end] in string.digits:
43 return True
45 if check_char(ing_str[end], ".", ",", "/") and end + 1 < len(ing_str) and ing_str[end + 1] in string.digits:
46 return True
48 return False
50 amount = 0.0
51 unit = ""
52 note = ""
54 did_check_frac = False
55 end = 0
57 while keep_looping(ing_str, end):
58 end += 1
60 if end > 0:
61 if "/" in ing_str[:end]:
62 amount = parse_fraction(ing_str[:end])
63 else:
64 amount = float(ing_str[:end].replace(",", "."))
65 else:
66 amount = parse_fraction(ing_str[0])
67 end += 1
68 did_check_frac = True
69 if end < len(ing_str):
70 if did_check_frac:
71 unit = ing_str[end:]
72 else:
73 try:
74 amount += parse_fraction(ing_str[end])
76 unit_end = end + 1
77 unit = ing_str[unit_end:]
78 except ValueError:
79 unit = ing_str[end:]
81 # i dont know any unit that starts with ( or - so its likely an alternative like 1L (500ml) Water or 2-3
82 if unit.startswith("(") or unit.startswith("-"):
83 unit = ""
84 note = ing_str
86 return amount, unit, note
89def parse_ingredient_with_comma(tokens) -> tuple[str, str]: 1a
90 ingredient = ""
91 note = ""
92 start = 0
93 # search for first occurrence of an argument ending in a comma
94 while start < len(tokens) and not tokens[start].endswith(","):
95 start += 1
96 if start == len(tokens):
97 # no token ending in a comma found -> use everything as ingredient
98 ingredient = " ".join(tokens)
99 else:
100 ingredient = " ".join(tokens[: start + 1])[:-1]
102 note_end = start + 1
103 note = " ".join(tokens[note_end:])
104 return ingredient, note
107def parse_ingredient(tokens) -> tuple[str, str]: 1a
108 ingredient = ""
109 note = ""
110 if tokens[-1].endswith(")"):
111 # Check if the matching opening bracket is in the same token
112 if (not tokens[-1].startswith("(")) and ("(" in tokens[-1]):
113 return parse_ingredient_with_comma(tokens)
114 # last argument ends with closing bracket -> look for opening bracket
115 start = len(tokens) - 1
116 while not tokens[start].startswith("(") and start != 0:
117 start -= 1
118 if start == 0:
119 # the whole list is wrapped in brackets -> assume it is an error (e.g. assumed first argument was the unit) # noqa: E501
120 raise ValueError
121 elif start < 0:
122 # no opening bracket anywhere -> just ignore the last bracket
123 ingredient, note = parse_ingredient_with_comma(tokens)
124 else:
125 # opening bracket found -> split in ingredient and note, remove brackets from note
126 note = " ".join(tokens[start:])[1:-1]
127 ingredient = " ".join(tokens[:start])
128 else:
129 ingredient, note = parse_ingredient_with_comma(tokens)
130 return ingredient, note
133def parse(ing_str, parser) -> BruteParsedIngredient: 1a
134 amount = 0.0
135 unit = ""
136 ingredient = ""
137 note = ""
138 unit_note = ""
140 ing_str = move_parens_to_end(ing_str)
142 tokens = ing_str.split()
144 # Early return if the ingrdient is a single token and therefore has no other properties
145 if len(tokens) == 1: 145 ↛ 150line 145 didn't jump to line 150 because the condition on line 145 was always true
146 ingredient = tokens[0]
147 # TODO Refactor to expect BFP to be returned instead of Tuple
148 return BruteParsedIngredient(food=ingredient, note=note, amount=amount, unit=unit)
150 try:
151 # try to parse first argument as amount
152 amount, unit, unit_note = parse_amount(tokens[0])
153 # only try to parse second argument as amount if there are at least
154 # three arguments if it already has a unit there can't be
155 # a fraction for the amount
156 if len(tokens) > 2:
157 try:
158 if unit != "":
159 # a unit is already found, no need to try the second argument for a fraction
160 # probably not the best method to do it, but I didn't want to make an if check and paste the exact same thing in the else as already is in the except # noqa: E501
161 raise ValueError
162 # try to parse second argument as amount and add that, in case of '2 1/2' or '2 ½'
163 amount += parse_fraction(tokens[1])
164 # assume that units can't end with a comma
165 if len(tokens) > 3 and not tokens[2].endswith(","):
166 # try to use third argument as unit and everything else as ingredient, use everything as ingredient if it fails # noqa: E501
167 try:
168 ingredient, note = parse_ingredient(tokens[3:])
169 unit = tokens[2]
170 except ValueError:
171 ingredient, note = parse_ingredient(tokens[2:])
172 else:
173 ingredient, note = parse_ingredient(tokens[2:])
174 except ValueError:
175 # assume that units can't end with a comma
176 if not tokens[1].endswith(","):
177 # try to use second argument as unit and everything else as ingredient, use everything as ingredient if it fails # noqa: E501
178 try:
179 ingredient, note = parse_ingredient(tokens[2:])
180 if unit == "":
181 unit = tokens[1]
182 else:
183 note = tokens[1]
184 except ValueError:
185 ingredient, note = parse_ingredient(tokens[1:])
186 else:
187 ingredient, note = parse_ingredient(tokens[1:])
188 else:
189 # only two arguments, first one is the amount
190 # which means this is the ingredient
191 ingredient = tokens[1]
192 except ValueError:
193 # can't parse first argument as amount
194 # try to parse as unit and ingredient (e.g. "a tblsp salt"), with unit in first three tokens
195 # won't work for units that have spaces
196 for index, token in enumerate(tokens[:3]):
197 if parser.data_matcher.find_unit_match(token):
198 unit = token
199 ingredient, note = parse_ingredient(tokens[index + 1 :])
200 break
201 if not unit:
202 try:
203 # no unit -> parse everything as ingredient
204 ingredient, note = parse_ingredient(tokens)
205 except ValueError:
206 ingredient = " ".join(tokens[1:])
208 if unit_note not in note:
209 note += " " + unit_note
211 return BruteParsedIngredient(food=ingredient, note=note, amount=amount, unit=unit)