Coverage for opt/mealie/lib/python3.12/site-packages/mealie/services/scraper/user_agents_manager.py: 86%
34 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-11-25 17:29 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-11-25 17:29 +0000
1from __future__ import annotations 1b
3import os 1b
4import random 1b
6_USER_AGENTS_MANAGER: UserAgentsManager | None = None 1b
9def get_user_agents_manager() -> UserAgentsManager: 1b
10 global _USER_AGENTS_MANAGER
12 if not _USER_AGENTS_MANAGER:
13 _USER_AGENTS_MANAGER = UserAgentsManager()
15 return _USER_AGENTS_MANAGER
18class UserAgentsManager: 1b
19 def __init__(self) -> None: 1b
20 self._user_agents: list[str] | None = None
21 self._user_agents_text_path = os.path.join(os.path.dirname(__file__), "user-agents.txt")
23 def get_scrape_headers(self, user_agent: str | None = None) -> dict[str, str]: 1b
24 # From: https://scrapeops.io/web-scraping-playbook/403-forbidden-error-web-scraping/#optimize-request-headers
25 if user_agent is None: 25 ↛ 26line 25 didn't jump to line 26 because the condition on line 25 was never true
26 user_agent = random.choice(self.user_agents)
28 return {
29 "User-Agent": user_agent,
30 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
31 "Accept-Language": "en-US,en;q=0.5",
32 "Accept-Encoding": "gzip, deflate",
33 "Connection": "keep-alive",
34 "Upgrade-Insecure-Requests": "1",
35 "Sec-Fetch-Dest": "document",
36 "Sec-Fetch-Mode": "navigate",
37 "Sec-Fetch-Site": "none",
38 "Sec-Fetch-User": "?1",
39 "Cache-Control": "max-age=0",
40 }
42 @property 1b
43 def user_agents(self) -> list[str]: 1b
44 if not self._user_agents:
45 self._user_agents = self._fetch_user_agents()
47 return self._user_agents
49 def _fetch_user_agents(self) -> list[str]: 1b
50 user_agents: list[str] = []
52 try:
53 from recipe_scrapers._abstract import HEADERS
55 user_agents.append(HEADERS["User-Agent"])
56 except (ImportError, KeyError):
57 user_agents.append("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/128.0")
59 with open(self._user_agents_text_path) as f:
60 for line in f:
61 if not line: 61 ↛ 62line 61 didn't jump to line 62 because the condition on line 61 was never true
62 continue
63 user_agents.append(line.strip())
65 return user_agents