Coverage for opt/mealie/lib/python3.12/site-packages/mealie/services/scraper/recipe_scraper.py: 70%
34 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-12-05 15:32 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-12-05 15:32 +0000
1from mealie.core.root_logger import get_logger 1d
2from mealie.lang.providers import Translator 1d
3from mealie.schema.recipe.recipe import Recipe 1d
4from mealie.services.scraper import cleaner 1d
5from mealie.services.scraper.scraped_extras import ScrapedExtras 1d
7from .scraper_strategies import ( 1d
8 ABCScraperStrategy,
9 RecipeScraperOpenAI,
10 RecipeScraperOpenGraph,
11 RecipeScraperPackage,
12 safe_scrape_html,
13)
15DEFAULT_SCRAPER_STRATEGIES: list[type[ABCScraperStrategy]] = [ 1d
16 RecipeScraperPackage,
17 RecipeScraperOpenAI,
18 RecipeScraperOpenGraph,
19]
22class RecipeScraper: 1d
23 """
24 Scrapes recipes from the web.
25 """
27 # List of recipe scrapers. Note that order matters
28 scrapers: list[type[ABCScraperStrategy]] 1d
30 def __init__(self, translator: Translator, scrapers: list[type[ABCScraperStrategy]] | None = None) -> None: 1d
31 if scrapers is None: 31 ↛ 34line 31 didn't jump to line 34 because the condition on line 31 was always true1abec
32 scrapers = DEFAULT_SCRAPER_STRATEGIES 1abec
34 self.scrapers = scrapers 1abec
35 self.translator = translator 1abec
36 self.logger = get_logger() 1abec
38 async def scrape(self, url: str, html: str | None = None) -> tuple[Recipe, ScrapedExtras] | tuple[None, None]: 1d
39 """
40 Scrapes a recipe from the web.
41 Skips the network request if `html` is provided.
42 """
44 raw_html = html or await safe_scrape_html(url) 1abc
45 for scraper_type in self.scrapers: 1abc
46 scraper = scraper_type(url, self.translator, raw_html=raw_html) 1abc
48 try: 1abc
49 result = await scraper.parse() 1abc
50 except Exception:
51 self.logger.exception(f"Failed to scrape HTML with {scraper.__class__.__name__}")
52 result = None
54 if result is None or result[0] is None: 54 ↛ 57line 54 didn't jump to line 57 because the condition on line 54 was always true1abc
55 continue 1abc
57 recipe_result, extras = result
58 try:
59 recipe = cleaner.clean(recipe_result, self.translator)
60 except Exception:
61 self.logger.exception(f"Failed to clean recipe data from {scraper.__class__.__name__}")
62 continue
64 return recipe, extras
66 return None, None 1abc