Coverage for opt/mealie/lib/python3.12/site-packages/mealie/services/parser_services/openai/parser.py: 20%
88 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-11-25 17:29 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-11-25 17:29 +0000
1import asyncio 1a
2import json 1a
3from collections.abc import Awaitable 1a
5from rapidfuzz import fuzz 1a
7from mealie.schema.openai.recipe_ingredient import OpenAIIngredient, OpenAIIngredients 1a
8from mealie.schema.recipe.recipe_ingredient import ( 1a
9 CreateIngredientFood,
10 CreateIngredientUnit,
11 IngredientConfidence,
12 ParsedIngredient,
13 RecipeIngredient,
14)
15from mealie.services.openai import OpenAIDataInjection, OpenAIService 1a
17from .._base import ABCIngredientParser 1a
18from ..parser_utils import extract_quantity_from_string 1a
21class OpenAIParser(ABCIngredientParser): 1a
22 def _calculate_qty_conf(self, original_text: str, parsed_qty: float | None) -> float: 1a
23 """Compares the extracted quantity to a brute-force parsed quantity."""
25 expected_qty, _ = extract_quantity_from_string(original_text)
26 parsed_qty = parsed_qty or 0
27 if parsed_qty == expected_qty:
28 return 1
29 else:
30 return 0
32 def _calculate_note_conf(self, original_text: str, note: str | None) -> float: 1a
33 """
34 Calculate confidence based on how many words in the note are found in the original text.
35 Uses alphanumeric filtering and lowercasing to improve matching.
36 """
38 if not note:
39 return 1
41 note_words: list[str] = []
42 for word in note.strip().lower().split():
43 clean_word = "".join(filter(str.isalnum, word))
44 if clean_word:
45 note_words.append(clean_word)
47 if not note_words:
48 return 1
50 original_words: list[str] = []
51 for word in original_text.strip().lower().split():
52 clean_word = "".join(filter(str.isalnum, word))
53 if clean_word:
54 original_words.append(clean_word)
56 note_conf_sum = sum(1 for word in note_words if word in original_words)
57 return note_conf_sum / len(note_words)
59 def _calculate_overall_confidence(self, original_text: str, ing_text: str) -> float: 1a
60 """
61 Calculate overall confidence based on fuzzy matching between the original text and the ingredient text.
62 Uses token sort ratio to account for word order variations.
63 """
65 ratio = fuzz.token_sort_ratio(original_text, ing_text)
66 return ratio / 100.0
68 def _calculate_confidence(self, original_text: str, ing: RecipeIngredient) -> IngredientConfidence: 1a
69 qty_conf = self._calculate_qty_conf(original_text, ing.quantity)
70 note_conf = self._calculate_note_conf(original_text, ing.note)
72 # Not all ingredients will have a food and/or unit,
73 # so if either is missing we fall back to overall confidence.
74 overall_confidence = self._calculate_overall_confidence(original_text, ing.display)
75 if ing.food:
76 food_conf = 1.0
77 else:
78 food_conf = overall_confidence
80 if ing.unit:
81 unit_conf = 1.0
82 else:
83 unit_conf = overall_confidence
85 return IngredientConfidence(
86 average=(qty_conf + unit_conf + food_conf + note_conf) / 4,
87 quantity=qty_conf,
88 unit=unit_conf,
89 food=food_conf,
90 comment=note_conf,
91 )
93 def _convert_ingredient(self, original_text: str, openai_ing: OpenAIIngredient) -> ParsedIngredient: 1a
94 ingredient = RecipeIngredient(
95 original_text=original_text,
96 quantity=openai_ing.quantity,
97 unit=CreateIngredientUnit(name=openai_ing.unit) if openai_ing.unit else None,
98 food=CreateIngredientFood(name=openai_ing.food) if openai_ing.food else None,
99 note=openai_ing.note,
100 )
102 parsed_ingredient = ParsedIngredient(
103 input=original_text,
104 confidence=self._calculate_confidence(original_text, ingredient),
105 ingredient=ingredient,
106 )
108 return self.find_ingredient_match(parsed_ingredient)
110 def _get_prompt(self, service: OpenAIService) -> str: 1a
111 data_injections = [
112 OpenAIDataInjection(
113 description=(
114 "This is the JSON response schema. You must respond in valid JSON that follows this schema. "
115 "Your payload should be as compact as possible, eliminating unncessesary whitespace. Any fields "
116 "with default values which you do not populate should not be in the payload."
117 ),
118 value=OpenAIIngredients,
119 ),
120 ]
122 if service.send_db_data and self.data_matcher.units_by_alias:
123 data_injections.extend(
124 [
125 OpenAIDataInjection(
126 description=(
127 "Below is a list of units found in the units database. While parsing, you should "
128 "reference this list when determining which part of the input is the unit. You may "
129 "find a unit in the input that does not exist in this list. This should not prevent "
130 "you from parsing that text as a unit."
131 ),
132 value=list(set(self.data_matcher.units_by_alias)),
133 ),
134 ]
135 )
137 return service.get_prompt("recipes.parse-recipe-ingredients", data_injections=data_injections)
139 @staticmethod 1a
140 def _chunk_messages(messages: list[str], n=1) -> list[list[str]]: 1a
141 if n < 1:
142 n = 1
143 return [messages[i : i + n] for i in range(0, len(messages), n)]
145 async def _parse(self, ingredients: list[str]) -> OpenAIIngredients: 1a
146 service = OpenAIService() 1bcd
147 prompt = self._get_prompt(service)
149 # chunk ingredients and send each chunk to its own worker
150 ingredient_chunks = self._chunk_messages(ingredients, n=service.workers)
151 tasks: list[Awaitable[str | None]] = []
152 for ingredient_chunk in ingredient_chunks:
153 message = json.dumps(ingredient_chunk, separators=(",", ":"))
154 tasks.append(service.get_response(prompt, message, force_json_response=True))
156 # re-combine chunks into one response
157 try:
158 responses_json = await asyncio.gather(*tasks)
159 except Exception as e:
160 raise Exception("Failed to call OpenAI services") from e
162 try:
163 responses = [
164 OpenAIIngredients.parse_openai_response(response_json)
165 for response_json in responses_json
166 if responses_json
167 ]
168 except Exception as e:
169 raise Exception("Failed to parse OpenAI response") from e
171 if not responses:
172 raise Exception("No response from OpenAI")
174 return OpenAIIngredients(
175 ingredients=[ingredient for response in responses for ingredient in response.ingredients]
176 )
178 async def parse_one(self, ingredient_string: str) -> ParsedIngredient: 1a
179 items = await self.parse([ingredient_string])
180 return items[0]
182 async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]: 1a
183 response = await self._parse(ingredients) 1bcd
184 if len(response.ingredients) != len(ingredients):
185 raise ValueError(
186 "OpenAI returned an unexpected number of ingredients. "
187 f"Expected {len(ingredients)}, got {len(response.ingredients)}"
188 )
190 return [
191 self._convert_ingredient(original_text, ing)
192 for original_text, ing in zip(ingredients, response.ingredients, strict=True)
193 ]