Coverage for opt/mealie/lib/python3.12/site-packages/mealie/services/scraper/recipe_scraper.py: 70%

Shortcuts on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

[ ] prev/next file

u up to the index

? show/hide this help

34 statements

« prev ^ index » next coverage.py v7.10.6, created at 2025-12-05 13:45 +0000

1from mealie.core.root_logger import get_logger 1 ctx1b

2from mealie.lang.providers import Translator 1 ctx1b

3from mealie.schema.recipe.recipe import Recipe 1 ctx1b

4from mealie.services.scraper import cleaner 1 ctx1b

5from mealie.services.scraper.scraped_extras import ScrapedExtras 1 ctx1b

7from .scraper_strategies import ( 1 ctx1b

8 ABCScraperStrategy,

9 RecipeScraperOpenAI,

10 RecipeScraperOpenGraph,

11 RecipeScraperPackage,

12 safe_scrape_html,

13)

15DEFAULT_SCRAPER_STRATEGIES: list[type[ABCScraperStrategy]] = [ 1 ctx1b

16 RecipeScraperPackage,

17 RecipeScraperOpenAI,

18 RecipeScraperOpenGraph,

19]

22class RecipeScraper: 1 ctx1b

23 """

24 Scrapes recipes from the web.

25 """

27 # List of recipe scrapers. Note that order matters

28 scrapers: list[type[ABCScraperStrategy]] 1 ctx1b

30 def __init__(self, translator: Translator, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None: 1 ctx1b

31 if scrapers is None: 31 ↛ 34line 31 didn't jump to line 34 because the condition on line 31 was always true4 ctx1acde

32 scrapers = DEFAULT_SCRAPER_STRATEGIES 4 ctx1acde

34 self.scrapers = scrapers 4 ctx1acde

35 self.translator = translator 4 ctx1acde

36 self.logger = get_logger() 4 ctx1acde

38 async def scrape(self, url: str, html: str | None = None) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]: 1 ctx1b

39 """

40 Scrapes a recipe from the web.

41 Skips the network request if `html` is provided.

42 """

44 raw_html = html or await safe_scrape_html(url) 1 ctx1a

45 for scraper_type in self.scrapers: 1 ctx1a

46 scraper = scraper_type(url, self.translator, raw_html=raw_html) 1 ctx1a

48 try: 1 ctx1a

49 result = await scraper.parse() 1 ctx1a

50 except Exception:

51 self.logger.exception(f"Failed to scrape HTML with {scraper.__class__.__name__}")

52 result = None

54 if result is None or result[0] is None: 54 ↛ 57line 54 didn't jump to line 57 because the condition on line 54 was always true1 ctx1a

55 continue 1 ctx1a

57 recipe_result, extras = result

58 try:

59 recipe = cleaner.clean(recipe_result, self.translator)

60 except Exception:

61 self.logger.exception(f"Failed to clean recipe data from {scraper.__class__.__name__}")

62 continue

64 return recipe, extras

66 return None, None 1 ctx1a