Coverage for /usr/local/lib/python3.12/site-packages/prefect/server/utilities/text_search_parser.py: 7%
74 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-12-05 11:21 +0000
« prev ^ index » next coverage.py v7.10.6, created at 2025-12-05 11:21 +0000
1"""Text search query parser
3Parses text search queries according to the following syntax:
5- Space-separated terms → OR logic (include)
6- Prefix with `-` or `!` → Exclude term
7- Prefix with `+` → Required term (AND logic, future)
8- Quote phrases → Match exact phrase
9- Backslash escapes → Allow quotes within phrases (\")
10- Case-insensitive, substring matching
11- 200 character limit
12"""
14from dataclasses import dataclass, field 1a
17@dataclass 1a
18class TextSearchQuery: 1a
19 """Parsed text search query structure"""
21 include: list[str] = field(default_factory=list) # OR terms 1a
22 exclude: list[str] = field(default_factory=list) # NOT terms (-/!) 1a
23 required: list[str] = field(default_factory=list) # AND terms (+) 1a
26def parse_text_search_query(query: str) -> TextSearchQuery: 1a
27 """Parse a text search query string into structured components
29 Args:
30 query: The query string to parse
32 Returns:
33 TextSearchQuery with parsed include/exclude/required terms
34 """
36 # Handle empty/whitespace-only queries
37 if not query.strip():
38 return TextSearchQuery()
40 result = TextSearchQuery()
41 i = 0
43 while i < len(query):
44 # Skip whitespace
45 if query[i].isspace():
46 i += 1
47 continue
49 # Check for prefix
50 prefix = None
51 if query[i] in "-!+":
52 prefix_char = query[i]
53 prefix_pos = i
54 i += 1
56 # Check if this is immediately followed by a non-whitespace character
57 if i < len(query) and not query[i].isspace():
58 # Valid prefix (no space between prefix and term)
59 prefix = prefix_char
60 else:
61 # Prefix followed by space - ignore the prefix completely
62 i = prefix_pos + 1 # Skip the prefix character
63 prefix = None
64 continue
66 # Handle quoted phrases
67 if i < len(query) and query[i] == '"':
68 i += 1 # Skip opening quote
69 phrase_start = i
71 # Find closing quote, handling escaped quotes
72 while i < len(query):
73 if query[i] == "\\" and i + 1 < len(query):
74 # Skip escaped character
75 i += 2
76 elif query[i] == '"':
77 # Found unescaped closing quote
78 break
79 else:
80 i += 1
82 # Extract phrase (even if quote is unclosed)
83 phrase = query[phrase_start:i]
85 if i < len(query): # Found closing quote
86 i += 1 # Skip closing quote
88 # Unescape quotes and backslashes in the phrase
89 phrase = _unescape_phrase(phrase)
91 # Add to appropriate list based on prefix
92 if phrase.strip():
93 if prefix == "-" or prefix == "!":
94 result.exclude.append(phrase)
95 elif prefix == "+":
96 result.required.append(phrase)
97 else:
98 result.include.append(phrase)
99 continue
101 # Handle regular terms
102 if i < len(query):
103 term_start = i
105 # Find end of term (next whitespace or quote)
106 while i < len(query) and not query[i].isspace() and query[i] != '"':
107 i += 1
109 term = query[term_start:i]
111 # Add to appropriate list based on prefix
112 if term:
113 if prefix == "-" or prefix == "!":
114 result.exclude.append(term)
115 elif prefix == "+":
116 result.required.append(term)
117 else:
118 result.include.append(term)
120 return result
123def _unescape_phrase(phrase: str) -> str: 1a
124 """Unescape quotes and backslashes in a quoted phrase"""
125 # Process escapes in order: first backslashes, then quotes
126 result = []
127 i = 0
128 while i < len(phrase):
129 if phrase[i] == "\\" and i + 1 < len(phrase):
130 next_char = phrase[i + 1]
131 if next_char == '"':
132 result.append('"')
133 i += 2
134 elif next_char == "\\":
135 result.append("\\")
136 i += 2
137 else:
138 # Not an escape sequence, keep the backslash
139 result.append("\\")
140 i += 1
141 else:
142 result.append(phrase[i])
143 i += 1
144 return "".join(result)