Coverage for opt/mealie/lib/python3.12/site-packages/mealie/services/scraper/recipe_bulk_scraper.py: 74%

75 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-11-25 15:48 +0000

1import asyncio 1b

2 

3from pydantic import UUID4 1b

4 

5from mealie.lang.providers import Translator 1b

6from mealie.repos.repository_factory import AllRepositories 1b

7from mealie.schema.recipe.recipe import CreateRecipeByUrlBulk, Recipe 1b

8from mealie.schema.reports.reports import ( 1b

9 ReportCategory, 

10 ReportCreate, 

11 ReportEntryCreate, 

12 ReportEntryOut, 

13 ReportSummaryStatus, 

14) 

15from mealie.schema.user.user import GroupInDB 1b

16from mealie.services._base_service import BaseService 1b

17from mealie.services.recipe.recipe_service import RecipeService 1b

18from mealie.services.scraper.scraper import create_from_html 1b

19 

20 

21class RecipeBulkScraperService(BaseService): 1b

22 report_entries: list[ReportEntryCreate] 1b

23 

24 def __init__( 1b

25 self, service: RecipeService, repos: AllRepositories, group: GroupInDB, translator: Translator 

26 ) -> None: 

27 self.service = service 1a

28 self.repos = repos 1a

29 self.group = group 1a

30 self.report_entries = [] 1a

31 self.translator = translator 1a

32 

33 super().__init__() 1a

34 

35 def get_report_id(self) -> UUID4: 1b

36 import_report = ReportCreate( 1a

37 name="Bulk Import", 

38 category=ReportCategory.bulk_import, 

39 status=ReportSummaryStatus.in_progress, 

40 group_id=self.group.id, 

41 ) 

42 

43 self.report = self.repos.group_reports.create(import_report) 1a

44 return self.report.id 1a

45 

46 def _add_error_entry(self, message: str, exception: str = "") -> None: 1b

47 self.report_entries.append( 1a

48 ReportEntryCreate( 

49 report_id=self.report.id, 

50 success=False, 

51 message=message, 

52 exception=exception, 

53 ) 

54 ) 

55 

56 def _save_all_entries(self) -> None: 1b

57 is_success = True 1a

58 is_failure = True 1a

59 

60 new_entries: list[ReportEntryOut] = [] 1a

61 for entry in self.report_entries: 1a

62 if is_failure and entry.success: 62 ↛ 63line 62 didn't jump to line 63 because the condition on line 62 was never true1a

63 is_failure = False 

64 

65 if is_success and not entry.success: 1a

66 is_success = False 1a

67 

68 new_entries.append(self.repos.group_report_entries.create(entry)) 1a

69 

70 if is_success: 1a

71 self.report.status = ReportSummaryStatus.success 1a

72 

73 if is_failure: 73 ↛ 76line 73 didn't jump to line 76 because the condition on line 73 was always true1a

74 self.report.status = ReportSummaryStatus.failure 1a

75 

76 if not is_success and not is_failure: 76 ↛ 77line 76 didn't jump to line 77 because the condition on line 76 was never true1a

77 self.report.status = ReportSummaryStatus.partial 

78 

79 self.report.entries = new_entries 1a

80 self.repos.group_reports.update(self.report.id, self.report) 1a

81 

82 async def scrape(self, urls: CreateRecipeByUrlBulk) -> None: 1b

83 sem = asyncio.Semaphore(3) 1a

84 

85 async def _do(url: str) -> Recipe | None: 1a

86 async with sem: 1a

87 try: 1a

88 recipe, _ = await create_from_html(url, self.translator) 1a

89 return recipe 

90 except Exception as e: 1a

91 self.service.logger.error(f"failed to scrape url during bulk url import {url}") 1a

92 self.service.logger.exception(e) 1a

93 self._add_error_entry(f"failed to scrape url {url}", str(e)) 1a

94 return None 1a

95 

96 if self.report is None: 96 ↛ 97line 96 didn't jump to line 97 because the condition on line 96 was never true1a

97 self.get_report_id() 

98 tasks = [_do(b.url) for b in urls.imports] 1a

99 results = await asyncio.gather(*tasks, return_exceptions=True) 1a

100 for b, recipe in zip(urls.imports, results, strict=True): 1a

101 if not recipe or isinstance(recipe, BaseException): 101 ↛ 104line 101 didn't jump to line 104 because the condition on line 101 was always true1a

102 continue 1a

103 

104 if b.tags: 

105 recipe.tags = b.tags 

106 

107 if b.categories: 

108 recipe.recipe_category = b.categories 

109 

110 try: 

111 self.service.create_one(recipe) 

112 except Exception as e: 

113 self.service.logger.error(f"Failed to save recipe to database during bulk url import {b.url}") 

114 self.service.logger.exception(e) 

115 self._add_error_entry(f"Failed to save recipe to database during bulk url import {b.url}", str(e)) 

116 continue 

117 

118 self.report_entries.append( 

119 ReportEntryCreate( 

120 report_id=self.report.id, 

121 success=True, 

122 message=f"Successfully imported recipe {recipe.name}", 

123 exception="", 

124 ) 

125 ) 

126 

127 self._save_all_entries() 1a