Coverage for opt/mealie/lib/python3.12/site-packages/mealie/services/scraper/user_agents_manager.py: 82%

34 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-11-25 15:32 +0000

1from __future__ import annotations 1b

2 

3import os 1b

4import random 1b

5 

6_USER_AGENTS_MANAGER: UserAgentsManager | None = None 1b

7 

8 

9def get_user_agents_manager() -> UserAgentsManager: 1b

10 global _USER_AGENTS_MANAGER 

11 

12 if not _USER_AGENTS_MANAGER: 12 ↛ 15line 12 didn't jump to line 15 because the condition on line 12 was always true

13 _USER_AGENTS_MANAGER = UserAgentsManager() 

14 

15 return _USER_AGENTS_MANAGER 

16 

17 

18class UserAgentsManager: 1b

19 def __init__(self) -> None: 1b

20 self._user_agents: list[str] | None = None 

21 self._user_agents_text_path = os.path.join(os.path.dirname(__file__), "user-agents.txt") 

22 

23 def get_scrape_headers(self, user_agent: str | None = None) -> dict[str, str]: 1b

24 # From: https://scrapeops.io/web-scraping-playbook/403-forbidden-error-web-scraping/#optimize-request-headers 

25 if user_agent is None: 25 ↛ 26line 25 didn't jump to line 26 because the condition on line 25 was never true

26 user_agent = random.choice(self.user_agents) 

27 

28 return { 

29 "User-Agent": user_agent, 

30 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", 

31 "Accept-Language": "en-US,en;q=0.5", 

32 "Accept-Encoding": "gzip, deflate", 

33 "Connection": "keep-alive", 

34 "Upgrade-Insecure-Requests": "1", 

35 "Sec-Fetch-Dest": "document", 

36 "Sec-Fetch-Mode": "navigate", 

37 "Sec-Fetch-Site": "none", 

38 "Sec-Fetch-User": "?1", 

39 "Cache-Control": "max-age=0", 

40 } 

41 

42 @property 1b

43 def user_agents(self) -> list[str]: 1b

44 if not self._user_agents: 44 ↛ 47line 44 didn't jump to line 47 because the condition on line 44 was always true

45 self._user_agents = self._fetch_user_agents() 

46 

47 return self._user_agents 

48 

49 def _fetch_user_agents(self) -> list[str]: 1b

50 user_agents: list[str] = [] 

51 

52 try: 

53 from recipe_scrapers._abstract import HEADERS 

54 

55 user_agents.append(HEADERS["User-Agent"]) 

56 except (ImportError, KeyError): 

57 user_agents.append("Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/128.0") 

58 

59 with open(self._user_agents_text_path) as f: 

60 for line in f: 

61 if not line: 61 ↛ 62line 61 didn't jump to line 62 because the condition on line 61 was never true

62 continue 

63 user_agents.append(line.strip()) 

64 

65 return user_agents