Coverage for opt/mealie/lib/python3.12/site-packages/mealie/services/migrations/copymethat.py: 14%

71 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-11-25 15:48 +0000

1import tempfile 1a

2import zipfile 1a

3from datetime import UTC, datetime 1a

4from pathlib import Path 1a

5 

6from bs4 import BeautifulSoup 1a

7 

8from mealie.schema.reports.reports import ReportEntryCreate 1a

9 

10from ._migration_base import BaseMigrator 1a

11from .utils.migration_alias import MigrationAlias 1a

12from .utils.migration_helpers import import_image 1a

13 

14 

15def parse_recipe_tags(tags: list) -> list[str]: 1a

16 """Parses the list of recipe tags and removes invalid ones""" 

17 

18 updated_tags: list[str] = [] 

19 for tag in tags: 

20 if not tag or not isinstance(tag, str): 

21 continue 

22 

23 if "Tags:" in tag: 

24 continue 

25 

26 updated_tags.append(tag) 

27 

28 return updated_tags 

29 

30 

31class CopyMeThatMigrator(BaseMigrator): 1a

32 def __init__(self, **kwargs): 1a

33 super().__init__(**kwargs) 

34 

35 self.name = "copymethat" 

36 

37 self.key_aliases = [ 

38 MigrationAlias(key="last_made", alias="made_this", func=lambda x: datetime.now(UTC)), 

39 MigrationAlias(key="notes", alias="recipeNotes"), 

40 MigrationAlias(key="orgURL", alias="original_link"), 

41 MigrationAlias(key="rating", alias="ratingValue"), 

42 MigrationAlias(key="recipeIngredient", alias="recipeIngredients"), 

43 MigrationAlias(key="recipeYield", alias="servings", func=lambda x: x.replace(":", ": ")), 

44 ] 

45 

46 def _process_recipe_document(self, source_dir: Path, soup: BeautifulSoup) -> dict: 1a

47 """Reads a single recipe's HTML and converts it to a dictionary""" 

48 

49 recipe_dict: dict = {} 

50 recipe_tags: list[str] = [] 

51 for tag in soup.find_all(): 

52 # the recipe image tag has no id, so we parse it directly 

53 if tag.name == "img" and "recipeImage" in tag.get("class", []): 

54 if image_path := tag.get("src"): 

55 recipe_dict["image"] = str(source_dir.joinpath(image_path)) 

56 

57 continue 

58 

59 # tags (internally named categories) are not in a list, and don't have ids 

60 if tag.name == "span" and "recipeCategory" in tag.get("class", []): 

61 recipe_tag = tag.get_text(strip=True) 

62 if "Tags:" not in recipe_tag: 

63 recipe_tags.append(recipe_tag) 

64 

65 continue 

66 

67 # add only elements with an id to the recipe dictionary 

68 if not (tag_id := tag.get("id")): 

69 continue 

70 

71 # for lists, store the list items as an array (e.g. for recipe instructions) 

72 if tag.name in ["ul", "ol"]: 

73 recipe_dict[tag_id] = [item.get_text(strip=True) for item in tag.find_all("li", recursive=False)] 

74 continue 

75 

76 # for all other tags, write the text directly to the recipe data 

77 recipe_dict[tag_id] = tag.get_text(strip=True) 

78 

79 if recipe_tags: 

80 recipe_dict["tags"] = recipe_tags 

81 

82 return recipe_dict 

83 

84 def _migrate(self) -> None: 1a

85 with tempfile.TemporaryDirectory() as tmpdir: 

86 with zipfile.ZipFile(self.archive) as zip_file: 

87 zip_file.extractall(tmpdir) 

88 

89 source_dir = self.get_zip_base_path(Path(tmpdir)) 

90 

91 recipes_as_dicts: list[dict] = [] 

92 for recipes_data_file in source_dir.glob("*.html"): 

93 with open(recipes_data_file, encoding="utf-8") as f: 

94 soup = BeautifulSoup(f, "lxml") 

95 for recipe_data in soup.find_all("div", class_="recipe"): 

96 try: 

97 recipes_as_dicts.append(self._process_recipe_document(source_dir, recipe_data)) 

98 

99 # since recipes are stored in one large file, we keep going on error 

100 except Exception as e: 

101 self.report_entries.append( 

102 ReportEntryCreate( 

103 report_id=self.report_id, 

104 success=False, 

105 message="Failed to parse recipe", 

106 exception=f"{type(e).__name__}: {e}", 

107 ) 

108 ) 

109 

110 recipes = [self.clean_recipe_dictionary(x) for x in recipes_as_dicts] 

111 results = self.import_recipes_to_database(recipes) 

112 recipe_lookup = {r.slug: r for r in recipes} 

113 for slug, recipe_id, status in results: 

114 if status: 

115 try: 

116 r = recipe_lookup.get(slug) 

117 if not r or not r.image: 

118 continue 

119 

120 except StopIteration: 

121 continue 

122 

123 import_image(r.image, recipe_id)