Coverage for opt/mealie/lib/python3.12/site-packages/mealie/services/parser_services/brute/process.py: 43%

136 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-11-25 17:29 +0000

1import string 1c

2import unicodedata 1c

3 

4from pydantic import BaseModel, ConfigDict 1c

5 

6from ..parser_utils import check_char, move_parens_to_end 1c

7 

8 

9class BruteParsedIngredient(BaseModel): 1c

10 food: str = "" 1c

11 note: str = "" 1c

12 amount: float = 1.0 1c

13 unit: str = "" 1c

14 model_config = ConfigDict(str_strip_whitespace=True) 1c

15 

16 

17def parse_fraction(x): 1c

18 if len(x) == 1 and "fraction" in unicodedata.decomposition(x): 18 ↛ 19line 18 didn't jump to line 19 because the condition on line 18 was never true1ab

19 frac_split = unicodedata.decomposition(x[-1:]).split() 

20 return float((frac_split[1]).replace("003", "")) / float((frac_split[3]).replace("003", "")) 

21 else: 

22 frac_split = x.split("/") 1ab

23 if len(frac_split) != 2: 23 ↛ 25line 23 didn't jump to line 25 because the condition on line 23 was always true1ab

24 raise ValueError 1ab

25 try: 

26 return int(frac_split[0]) / int(frac_split[1]) 

27 except ZeroDivisionError as e: 

28 raise ValueError from e 

29 

30 

31def parse_amount(ing_str) -> tuple[float, str, str]: 1c

32 def keep_looping(ing_str, end) -> bool: 1ab

33 """ 

34 Checks if: 

35 1. the end of the string is reached 

36 2. or if the next character is a digit 

37 3. or if the next character looks like an number (e.g. 1/2, 1.3, 1,500) 

38 """ 

39 if end >= len(ing_str): 39 ↛ 40line 39 didn't jump to line 40 because the condition on line 39 was never true1ab

40 return False 

41 

42 if ing_str[end] in string.digits: 42 ↛ 43line 42 didn't jump to line 43 because the condition on line 42 was never true1ab

43 return True 

44 

45 if check_char(ing_str[end], ".", ",", "/") and end + 1 < len(ing_str) and ing_str[end + 1] in string.digits: 45 ↛ 46line 45 didn't jump to line 46 because the condition on line 45 was never true1ab

46 return True 

47 

48 return False 1ab

49 

50 amount = 0.0 1ab

51 unit = "" 1ab

52 note = "" 1ab

53 

54 did_check_frac = False 1ab

55 end = 0 1ab

56 

57 while keep_looping(ing_str, end): 57 ↛ 58line 57 didn't jump to line 58 because the condition on line 57 was never true1ab

58 end += 1 

59 

60 if end > 0: 60 ↛ 61line 60 didn't jump to line 61 because the condition on line 60 was never true1ab

61 if "/" in ing_str[:end]: 

62 amount = parse_fraction(ing_str[:end]) 

63 else: 

64 amount = float(ing_str[:end].replace(",", ".")) 

65 else: 

66 amount = parse_fraction(ing_str[0]) 1ab

67 end += 1 

68 did_check_frac = True 

69 if end < len(ing_str): 

70 if did_check_frac: 

71 unit = ing_str[end:] 

72 else: 

73 try: 

74 amount += parse_fraction(ing_str[end]) 

75 

76 unit_end = end + 1 

77 unit = ing_str[unit_end:] 

78 except ValueError: 

79 unit = ing_str[end:] 

80 

81 # i dont know any unit that starts with ( or - so its likely an alternative like 1L (500ml) Water or 2-3 

82 if unit.startswith("(") or unit.startswith("-"): 

83 unit = "" 

84 note = ing_str 

85 

86 return amount, unit, note 

87 

88 

89def parse_ingredient_with_comma(tokens) -> tuple[str, str]: 1c

90 ingredient = "" 1ab

91 note = "" 1ab

92 start = 0 1ab

93 # search for first occurrence of an argument ending in a comma 

94 while start < len(tokens) and not tokens[start].endswith(","): 1ab

95 start += 1 1ab

96 if start == len(tokens): 96 ↛ 100line 96 didn't jump to line 100 because the condition on line 96 was always true1ab

97 # no token ending in a comma found -> use everything as ingredient 

98 ingredient = " ".join(tokens) 1ab

99 else: 

100 ingredient = " ".join(tokens[: start + 1])[:-1] 

101 

102 note_end = start + 1 

103 note = " ".join(tokens[note_end:]) 

104 return ingredient, note 1ab

105 

106 

107def parse_ingredient(tokens) -> tuple[str, str]: 1c

108 ingredient = "" 1ab

109 note = "" 1ab

110 if tokens[-1].endswith(")"): 110 ↛ 112line 110 didn't jump to line 112 because the condition on line 110 was never true1ab

111 # Check if the matching opening bracket is in the same token 

112 if (not tokens[-1].startswith("(")) and ("(" in tokens[-1]): 

113 return parse_ingredient_with_comma(tokens) 

114 # last argument ends with closing bracket -> look for opening bracket 

115 start = len(tokens) - 1 

116 while not tokens[start].startswith("(") and start != 0: 

117 start -= 1 

118 if start == 0: 

119 # the whole list is wrapped in brackets -> assume it is an error (e.g. assumed first argument was the unit) # noqa: E501 

120 raise ValueError 

121 elif start < 0: 

122 # no opening bracket anywhere -> just ignore the last bracket 

123 ingredient, note = parse_ingredient_with_comma(tokens) 

124 else: 

125 # opening bracket found -> split in ingredient and note, remove brackets from note 

126 note = " ".join(tokens[start:])[1:-1] 

127 ingredient = " ".join(tokens[:start]) 

128 else: 

129 ingredient, note = parse_ingredient_with_comma(tokens) 1ab

130 return ingredient, note 1ab

131 

132 

133def parse(ing_str, parser) -> BruteParsedIngredient: 1c

134 amount = 0.0 1dab

135 unit = "" 1dab

136 ingredient = "" 1dab

137 note = "" 1dab

138 unit_note = "" 1dab

139 

140 ing_str = move_parens_to_end(ing_str) 1dab

141 

142 tokens = ing_str.split() 1dab

143 

144 # Early return if the ingrdient is a single token and therefore has no other properties 

145 if len(tokens) == 1: 1dab

146 ingredient = tokens[0] 1dab

147 # TODO Refactor to expect BFP to be returned instead of Tuple 

148 return BruteParsedIngredient(food=ingredient, note=note, amount=amount, unit=unit) 1dab

149 

150 try: 1dab

151 # try to parse first argument as amount 

152 amount, unit, unit_note = parse_amount(tokens[0]) 1dab

153 # only try to parse second argument as amount if there are at least 

154 # three arguments if it already has a unit there can't be 

155 # a fraction for the amount 

156 if len(tokens) > 2: 

157 try: 

158 if unit != "": 

159 # a unit is already found, no need to try the second argument for a fraction 

160 # probably not the best method to do it, but I didn't want to make an if check and paste the exact same thing in the else as already is in the except # noqa: E501 

161 raise ValueError 

162 # try to parse second argument as amount and add that, in case of '2 1/2' or '2 ½' 

163 amount += parse_fraction(tokens[1]) 

164 # assume that units can't end with a comma 

165 if len(tokens) > 3 and not tokens[2].endswith(","): 

166 # try to use third argument as unit and everything else as ingredient, use everything as ingredient if it fails # noqa: E501 

167 try: 

168 ingredient, note = parse_ingredient(tokens[3:]) 

169 unit = tokens[2] 

170 except ValueError: 

171 ingredient, note = parse_ingredient(tokens[2:]) 

172 else: 

173 ingredient, note = parse_ingredient(tokens[2:]) 

174 except ValueError: 

175 # assume that units can't end with a comma 

176 if not tokens[1].endswith(","): 

177 # try to use second argument as unit and everything else as ingredient, use everything as ingredient if it fails # noqa: E501 

178 try: 

179 ingredient, note = parse_ingredient(tokens[2:]) 

180 if unit == "": 

181 unit = tokens[1] 

182 else: 

183 note = tokens[1] 

184 except ValueError: 

185 ingredient, note = parse_ingredient(tokens[1:]) 

186 else: 

187 ingredient, note = parse_ingredient(tokens[1:]) 

188 else: 

189 # only two arguments, first one is the amount 

190 # which means this is the ingredient 

191 ingredient = tokens[1] 

192 except ValueError: 1dab

193 # can't parse first argument as amount 

194 # try to parse as unit and ingredient (e.g. "a tblsp salt"), with unit in first three tokens 

195 # won't work for units that have spaces 

196 for index, token in enumerate(tokens[:3]): 1ab

197 if parser.data_matcher.find_unit_match(token): 197 ↛ 198line 197 didn't jump to line 198 because the condition on line 197 was never true1ab

198 unit = token 

199 ingredient, note = parse_ingredient(tokens[index + 1 :]) 

200 break 

201 if not unit: 201 ↛ 208line 201 didn't jump to line 208 because the condition on line 201 was always true1ab

202 try: 1ab

203 # no unit -> parse everything as ingredient 

204 ingredient, note = parse_ingredient(tokens) 1ab

205 except ValueError: 

206 ingredient = " ".join(tokens[1:]) 

207 

208 if unit_note not in note: 208 ↛ 209line 208 didn't jump to line 209 because the condition on line 208 was never true1ab

209 note += " " + unit_note 

210 

211 return BruteParsedIngredient(food=ingredient, note=note, amount=amount, unit=unit) 1ab