Coverage for opt/mealie/lib/python3.12/site-packages/mealie/services/parser

1import asyncio 1 ctx1a

2import json 1 ctx1a

3from collections.abc import Awaitable 1 ctx1a

5from rapidfuzz import fuzz 1 ctx1a

7from mealie.schema.openai.recipe_ingredient import OpenAIIngredient, OpenAIIngredients 1 ctx1a

8from mealie.schema.recipe.recipe_ingredient import ( 1 ctx1a

9 CreateIngredientFood,

10 CreateIngredientUnit,

11 IngredientConfidence,

12 ParsedIngredient,

13 RecipeIngredient,

14)

15from mealie.services.openai import OpenAIDataInjection, OpenAIService 1 ctx1a

17from .._base import ABCIngredientParser 1 ctx1a

18from ..parser_utils import extract_quantity_from_string 1 ctx1a

21class OpenAIParser(ABCIngredientParser): 1 ctx1a

22 def _calculate_qty_conf(self, original_text: str, parsed_qty: float | None) -> float: 1 ctx1a

23 """Compares the extracted quantity to a brute-force parsed quantity."""

25 expected_qty, _ = extract_quantity_from_string(original_text)

26 parsed_qty = parsed_qty or 0

27 if parsed_qty == expected_qty:

28 return 1

29 else:

30 return 0

32 def _calculate_note_conf(self, original_text: str, note: str | None) -> float: 1 ctx1a

33 """

34 Calculate confidence based on how many words in the note are found in the original text.

35 Uses alphanumeric filtering and lowercasing to improve matching.

36 """

38 if not note:

39 return 1

41 note_words: list[str] = []

42 for word in note.strip().lower().split():

43 clean_word = "".join(filter(str.isalnum, word))

44 if clean_word:

45 note_words.append(clean_word)

47 if not note_words:

48 return 1

50 original_words: list[str] = []

51 for word in original_text.strip().lower().split():

52 clean_word = "".join(filter(str.isalnum, word))

53 if clean_word:

54 original_words.append(clean_word)

56 note_conf_sum = sum(1 for word in note_words if word in original_words)

57 return note_conf_sum / len(note_words)

59 def _calculate_overall_confidence(self, original_text: str, ing_text: str) -> float: 1 ctx1a

60 """

61 Calculate overall confidence based on fuzzy matching between the original text and the ingredient text.

62 Uses token sort ratio to account for word order variations.

63 """

65 ratio = fuzz.token_sort_ratio(original_text, ing_text)

66 return ratio / 100.0

68 def _calculate_confidence(self, original_text: str, ing: RecipeIngredient) -> IngredientConfidence: 1 ctx1a

69 qty_conf = self._calculate_qty_conf(original_text, ing.quantity)

70 note_conf = self._calculate_note_conf(original_text, ing.note)

72 # Not all ingredients will have a food and/or unit,

73 # so if either is missing we fall back to overall confidence.

74 overall_confidence = self._calculate_overall_confidence(original_text, ing.display)

75 if ing.food:

76 food_conf = 1.0

77 else:

78 food_conf = overall_confidence

80 if ing.unit:

81 unit_conf = 1.0

82 else:

83 unit_conf = overall_confidence

85 return IngredientConfidence(

86 average=(qty_conf + unit_conf + food_conf + note_conf) / 4,

87 quantity=qty_conf,

88 unit=unit_conf,

89 food=food_conf,

90 comment=note_conf,

91 )

93 def _convert_ingredient(self, original_text: str, openai_ing: OpenAIIngredient) -> ParsedIngredient: 1 ctx1a

94 ingredient = RecipeIngredient(

95 original_text=original_text,

96 quantity=openai_ing.quantity,

97 unit=CreateIngredientUnit(name=openai_ing.unit) if openai_ing.unit else None,

98 food=CreateIngredientFood(name=openai_ing.food) if openai_ing.food else None,

99 note=openai_ing.note,

100 )

101

102 parsed_ingredient = ParsedIngredient(

103 input=original_text,

104 confidence=self._calculate_confidence(original_text, ingredient),

105 ingredient=ingredient,

106 )

107

108 return self.find_ingredient_match(parsed_ingredient)

109

110 def _get_prompt(self, service: OpenAIService) -> str: 1 ctx1a

111 data_injections = [

112 OpenAIDataInjection(

113 description=(

114 "This is the JSON response schema. You must respond in valid JSON that follows this schema. "

115 "Your payload should be as compact as possible, eliminating unncessesary whitespace. Any fields "

116 "with default values which you do not populate should not be in the payload."

117 ),

118 value=OpenAIIngredients,

119 ),

120 ]

121

122 if service.send_db_data and self.data_matcher.units_by_alias:

123 data_injections.extend(

124 [

125 OpenAIDataInjection(

126 description=(

127 "Below is a list of units found in the units database. While parsing, you should "

128 "reference this list when determining which part of the input is the unit. You may "

129 "find a unit in the input that does not exist in this list. This should not prevent "

130 "you from parsing that text as a unit."

131 ),

132 value=list(set(self.data_matcher.units_by_alias)),

133 ),

134 ]

135 )

136

137 return service.get_prompt("recipes.parse-recipe-ingredients", data_injections=data_injections)

138

139 @staticmethod 1 ctx1a

140 def _chunk_messages(messages: list[str], n=1) -> list[list[str]]: 1 ctx1a

141 if n < 1:

142 n = 1

143 return [messages[i : i + n] for i in range(0, len(messages), n)]

144

145 async def _parse(self, ingredients: list[str]) -> OpenAIIngredients: 1 ctx1a

146 service = OpenAIService() (empty)

147 prompt = self._get_prompt(service)

148

149 # chunk ingredients and send each chunk to its own worker

150 ingredient_chunks = self._chunk_messages(ingredients, n=service.workers)

151 tasks: list[Awaitable[str | None]] = []

152 for ingredient_chunk in ingredient_chunks:

153 message = json.dumps(ingredient_chunk, separators=(",", ":"))

154 tasks.append(service.get_response(prompt, message, force_json_response=True))

155

156 # re-combine chunks into one response

157 try:

158 responses_json = await asyncio.gather(*tasks)

159 except Exception as e:

160 raise Exception("Failed to call OpenAI services") from e

161

162 try:

163 responses = [

164 OpenAIIngredients.parse_openai_response(response_json)

165 for response_json in responses_json

166 if responses_json

167 ]

168 except Exception as e:

169 raise Exception("Failed to parse OpenAI response") from e

170

171 if not responses:

172 raise Exception("No response from OpenAI")

173

174 return OpenAIIngredients(

175 ingredients=[ingredient for response in responses for ingredient in response.ingredients]

176 )

177

178 async def parse_one(self, ingredient_string: str) -> ParsedIngredient: 1 ctx1a

179 items = await self.parse([ingredient_string])

180 return items[0]

181

182 async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]: 1 ctx1a

183 response = await self._parse(ingredients) (empty)

184 if len(response.ingredients) != len(ingredients):

185 raise ValueError(

186 "OpenAI returned an unexpected number of ingredients. "

187 f"Expected {len(ingredients)}, got {len(response.ingredients)}"

188 )

189

190 return [

191 self._convert_ingredient(original_text, ing)

192 for original_text, ing in zip(ingredients, response.ingredients, strict=True)

193 ]

Coverage for opt/mealie/lib/python3.12/site-packages/mealie/services/parser_services/openai/parser.py: 20%

88 statements