Coverage for opt/mealie/lib/python3.12/site-packages/mealie/services/parser_services/openai/parser.py: 20%

88 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-11-25 15:48 +0000

1import asyncio 1a

2import json 1a

3from collections.abc import Awaitable 1a

4 

5from rapidfuzz import fuzz 1a

6 

7from mealie.schema.openai.recipe_ingredient import OpenAIIngredient, OpenAIIngredients 1a

8from mealie.schema.recipe.recipe_ingredient import ( 1a

9 CreateIngredientFood, 

10 CreateIngredientUnit, 

11 IngredientConfidence, 

12 ParsedIngredient, 

13 RecipeIngredient, 

14) 

15from mealie.services.openai import OpenAIDataInjection, OpenAIService 1a

16 

17from .._base import ABCIngredientParser 1a

18from ..parser_utils import extract_quantity_from_string 1a

19 

20 

21class OpenAIParser(ABCIngredientParser): 1a

22 def _calculate_qty_conf(self, original_text: str, parsed_qty: float | None) -> float: 1a

23 """Compares the extracted quantity to a brute-force parsed quantity.""" 

24 

25 expected_qty, _ = extract_quantity_from_string(original_text) 

26 parsed_qty = parsed_qty or 0 

27 if parsed_qty == expected_qty: 

28 return 1 

29 else: 

30 return 0 

31 

32 def _calculate_note_conf(self, original_text: str, note: str | None) -> float: 1a

33 """ 

34 Calculate confidence based on how many words in the note are found in the original text. 

35 Uses alphanumeric filtering and lowercasing to improve matching. 

36 """ 

37 

38 if not note: 

39 return 1 

40 

41 note_words: list[str] = [] 

42 for word in note.strip().lower().split(): 

43 clean_word = "".join(filter(str.isalnum, word)) 

44 if clean_word: 

45 note_words.append(clean_word) 

46 

47 if not note_words: 

48 return 1 

49 

50 original_words: list[str] = [] 

51 for word in original_text.strip().lower().split(): 

52 clean_word = "".join(filter(str.isalnum, word)) 

53 if clean_word: 

54 original_words.append(clean_word) 

55 

56 note_conf_sum = sum(1 for word in note_words if word in original_words) 

57 return note_conf_sum / len(note_words) 

58 

59 def _calculate_overall_confidence(self, original_text: str, ing_text: str) -> float: 1a

60 """ 

61 Calculate overall confidence based on fuzzy matching between the original text and the ingredient text. 

62 Uses token sort ratio to account for word order variations. 

63 """ 

64 

65 ratio = fuzz.token_sort_ratio(original_text, ing_text) 

66 return ratio / 100.0 

67 

68 def _calculate_confidence(self, original_text: str, ing: RecipeIngredient) -> IngredientConfidence: 1a

69 qty_conf = self._calculate_qty_conf(original_text, ing.quantity) 

70 note_conf = self._calculate_note_conf(original_text, ing.note) 

71 

72 # Not all ingredients will have a food and/or unit, 

73 # so if either is missing we fall back to overall confidence. 

74 overall_confidence = self._calculate_overall_confidence(original_text, ing.display) 

75 if ing.food: 

76 food_conf = 1.0 

77 else: 

78 food_conf = overall_confidence 

79 

80 if ing.unit: 

81 unit_conf = 1.0 

82 else: 

83 unit_conf = overall_confidence 

84 

85 return IngredientConfidence( 

86 average=(qty_conf + unit_conf + food_conf + note_conf) / 4, 

87 quantity=qty_conf, 

88 unit=unit_conf, 

89 food=food_conf, 

90 comment=note_conf, 

91 ) 

92 

93 def _convert_ingredient(self, original_text: str, openai_ing: OpenAIIngredient) -> ParsedIngredient: 1a

94 ingredient = RecipeIngredient( 

95 original_text=original_text, 

96 quantity=openai_ing.quantity, 

97 unit=CreateIngredientUnit(name=openai_ing.unit) if openai_ing.unit else None, 

98 food=CreateIngredientFood(name=openai_ing.food) if openai_ing.food else None, 

99 note=openai_ing.note, 

100 ) 

101 

102 parsed_ingredient = ParsedIngredient( 

103 input=original_text, 

104 confidence=self._calculate_confidence(original_text, ingredient), 

105 ingredient=ingredient, 

106 ) 

107 

108 return self.find_ingredient_match(parsed_ingredient) 

109 

110 def _get_prompt(self, service: OpenAIService) -> str: 1a

111 data_injections = [ 

112 OpenAIDataInjection( 

113 description=( 

114 "This is the JSON response schema. You must respond in valid JSON that follows this schema. " 

115 "Your payload should be as compact as possible, eliminating unncessesary whitespace. Any fields " 

116 "with default values which you do not populate should not be in the payload." 

117 ), 

118 value=OpenAIIngredients, 

119 ), 

120 ] 

121 

122 if service.send_db_data and self.data_matcher.units_by_alias: 

123 data_injections.extend( 

124 [ 

125 OpenAIDataInjection( 

126 description=( 

127 "Below is a list of units found in the units database. While parsing, you should " 

128 "reference this list when determining which part of the input is the unit. You may " 

129 "find a unit in the input that does not exist in this list. This should not prevent " 

130 "you from parsing that text as a unit." 

131 ), 

132 value=list(set(self.data_matcher.units_by_alias)), 

133 ), 

134 ] 

135 ) 

136 

137 return service.get_prompt("recipes.parse-recipe-ingredients", data_injections=data_injections) 

138 

139 @staticmethod 1a

140 def _chunk_messages(messages: list[str], n=1) -> list[list[str]]: 1a

141 if n < 1: 

142 n = 1 

143 return [messages[i : i + n] for i in range(0, len(messages), n)] 

144 

145 async def _parse(self, ingredients: list[str]) -> OpenAIIngredients: 1a

146 service = OpenAIService() 

147 prompt = self._get_prompt(service) 

148 

149 # chunk ingredients and send each chunk to its own worker 

150 ingredient_chunks = self._chunk_messages(ingredients, n=service.workers) 

151 tasks: list[Awaitable[str | None]] = [] 

152 for ingredient_chunk in ingredient_chunks: 

153 message = json.dumps(ingredient_chunk, separators=(",", ":")) 

154 tasks.append(service.get_response(prompt, message, force_json_response=True)) 

155 

156 # re-combine chunks into one response 

157 try: 

158 responses_json = await asyncio.gather(*tasks) 

159 except Exception as e: 

160 raise Exception("Failed to call OpenAI services") from e 

161 

162 try: 

163 responses = [ 

164 OpenAIIngredients.parse_openai_response(response_json) 

165 for response_json in responses_json 

166 if responses_json 

167 ] 

168 except Exception as e: 

169 raise Exception("Failed to parse OpenAI response") from e 

170 

171 if not responses: 

172 raise Exception("No response from OpenAI") 

173 

174 return OpenAIIngredients( 

175 ingredients=[ingredient for response in responses for ingredient in response.ingredients] 

176 ) 

177 

178 async def parse_one(self, ingredient_string: str) -> ParsedIngredient: 1a

179 items = await self.parse([ingredient_string]) 

180 return items[0] 

181 

182 async def parse(self, ingredients: list[str]) -> list[ParsedIngredient]: 1a

183 response = await self._parse(ingredients) 

184 if len(response.ingredients) != len(ingredients): 

185 raise ValueError( 

186 "OpenAI returned an unexpected number of ingredients. " 

187 f"Expected {len(ingredients)}, got {len(response.ingredients)}" 

188 ) 

189 

190 return [ 

191 self._convert_ingredient(original_text, ing) 

192 for original_text, ing in zip(ingredients, response.ingredients, strict=True) 

193 ]