Coverage for opt/mealie/lib/python3.12/site-packages/mealie/services/scraper/recipe_scraper.py: 42%

34 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-11-25 15:32 +0000

1from mealie.core.root_logger import get_logger 1a

2from mealie.lang.providers import Translator 1a

3from mealie.schema.recipe.recipe import Recipe 1a

4from mealie.services.scraper import cleaner 1a

5from mealie.services.scraper.scraped_extras import ScrapedExtras 1a

6 

7from .scraper_strategies import ( 1a

8 ABCScraperStrategy, 

9 RecipeScraperOpenAI, 

10 RecipeScraperOpenGraph, 

11 RecipeScraperPackage, 

12 safe_scrape_html, 

13) 

14 

15DEFAULT_SCRAPER_STRATEGIES: list[type[ABCScraperStrategy]] = [ 1a

16 RecipeScraperPackage, 

17 RecipeScraperOpenAI, 

18 RecipeScraperOpenGraph, 

19] 

20 

21 

22class RecipeScraper: 1a

23 """ 

24 Scrapes recipes from the web. 

25 """ 

26 

27 # List of recipe scrapers. Note that order matters 

28 scrapers: list[type[ABCScraperStrategy]] 1a

29 

30 def __init__(self, translator: Translator, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None: 1a

31 if scrapers is None: 31 ↛ 34line 31 didn't jump to line 34 because the condition on line 31 was always true

32 scrapers = DEFAULT_SCRAPER_STRATEGIES 

33 

34 self.scrapers = scrapers 

35 self.translator = translator 

36 self.logger = get_logger() 

37 

38 async def scrape(self, url: str, html: str | None = None) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]: 1a

39 """ 

40 Scrapes a recipe from the web. 

41 Skips the network request if `html` is provided. 

42 """ 

43 

44 raw_html = html or await safe_scrape_html(url) 

45 for scraper_type in self.scrapers: 

46 scraper = scraper_type(url, self.translator, raw_html=raw_html) 

47 

48 try: 

49 result = await scraper.parse() 

50 except Exception: 

51 self.logger.exception(f"Failed to scrape HTML with {scraper.__class__.__name__}") 

52 result = None 

53 

54 if result is None or result[0] is None: 

55 continue 

56 

57 recipe_result, extras = result 

58 try: 

59 recipe = cleaner.clean(recipe_result, self.translator) 

60 except Exception: 

61 self.logger.exception(f"Failed to clean recipe data from {scraper.__class__.__name__}") 

62 continue 

63 

64 return recipe, extras 

65 

66 return None, None