Coverage for opt/mealie/lib/python3.12/site-packages/mealie/services/scraper/scraper.py: 54%

Shortcuts on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

[ ] prev/next file

u up to the index

? show/hide this help

45 statements

« prev ^ index » next coverage.py v7.10.6, created at 2025-12-05 15:32 +0000

1from enum import Enum 1 ctx1a

2from re import search as regex_search 1 ctx1a

3from uuid import uuid4 1 ctx1a

5from fastapi import HTTPException, status 1 ctx1a

6from slugify import slugify 1 ctx1a

8from mealie.core.root_logger import get_logger 1 ctx1a

9from mealie.lang.providers import Translator 1 ctx1a

10from mealie.pkgs import cache 1 ctx1a

11from mealie.schema.recipe import Recipe 1 ctx1a

12from mealie.services.recipe.recipe_data_service import RecipeDataService 1 ctx1a

13from mealie.services.scraper.scraped_extras import ScrapedExtras 1 ctx1a

15from .recipe_scraper import RecipeScraper 1 ctx1a

18class ParserErrors(str, Enum): 1 ctx1a

19 BAD_RECIPE_DATA = "BAD_RECIPE_DATA" 1 ctx1a

20 NO_RECIPE_DATA = "NO_RECIPE_DATA" 1 ctx1a

21 CONNECTION_ERROR = "CONNECTION_ERROR" 1 ctx1a

24async def create_from_html( 1 ctx1a

25 url: str, translator: Translator, html: str | None = None

26) -> tuple[Recipe, ScrapedExtras | None]:

27 """Main entry point for generating a recipe from a URL. Pass in a URL and

28 a Recipe object will be returned if successful. Optionally pass in the HTML to skip fetching it.

30 Args:

31 url (str): a valid string representing a URL

32 html (str | None): optional HTML string to skip network request. Defaults to None.

34 Returns:

35 Recipe: Recipe Object

36 """

37 scraper = RecipeScraper(translator) 4 ctx1bced

39 if not html: 4 ctx1bced

40 extracted_url = regex_search(r"(https?://|www\.)[^\s]+", url) 4 ctx1bced

41 if not extracted_url: 4 ctx1bced

42 raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value}) 3 ctx1ced

43 url = extracted_url.group(0) 1 ctx1b

45 new_recipe, extras = await scraper.scrape(url, html) 3 ctx1bcd

47 if not new_recipe: 47 ↛ 50line 47 didn't jump to line 50 because the condition on line 47 was always true3 ctx1bcd

48 raise HTTPException(status.HTTP_400_BAD_REQUEST, {"details": ParserErrors.BAD_RECIPE_DATA.value}) 3 ctx1bcd

50 new_recipe.id = uuid4()

51 logger = get_logger()

52 logger.debug(f"Image {new_recipe.image}")

54 recipe_data_service = RecipeDataService(new_recipe.id)

56 try:

57 if new_recipe.image and isinstance(new_recipe.image, list):

58 new_recipe.image = new_recipe.image[0]

59 await recipe_data_service.scrape_image(new_recipe.image) # type: ignore

61 if new_recipe.name is None:

62 new_recipe.name = "Untitled"

64 new_recipe.slug = slugify(new_recipe.name)

65 new_recipe.image = cache.new_key(4)

66 except Exception as e:

67 recipe_data_service.logger.exception(f"Error Scraping Image: {e}")

68 new_recipe.image = "no image"

70 if new_recipe.name is None or new_recipe.name == "":

71 new_recipe.name = f"No Recipe Name Found - {uuid4()!s}"

72 new_recipe.slug = slugify(new_recipe.name)

74 return new_recipe, extras