Coverage for /usr/local/lib/python3.12/site-packages/prefect/server/utilities/text_search_parser.py: 7%

74 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-12-05 13:38 +0000

1"""Text search query parser 

2 

3Parses text search queries according to the following syntax: 

4 

5- Space-separated terms → OR logic (include) 

6- Prefix with `-` or `!` → Exclude term 

7- Prefix with `+` → Required term (AND logic, future) 

8- Quote phrases → Match exact phrase 

9- Backslash escapes → Allow quotes within phrases (\") 

10- Case-insensitive, substring matching 

11- 200 character limit 

12""" 

13 

14from dataclasses import dataclass, field 1a

15 

16 

17@dataclass 1a

18class TextSearchQuery: 1a

19 """Parsed text search query structure""" 

20 

21 include: list[str] = field(default_factory=list) # OR terms 1a

22 exclude: list[str] = field(default_factory=list) # NOT terms (-/!) 1a

23 required: list[str] = field(default_factory=list) # AND terms (+) 1a

24 

25 

26def parse_text_search_query(query: str) -> TextSearchQuery: 1a

27 """Parse a text search query string into structured components 

28 

29 Args: 

30 query: The query string to parse 

31 

32 Returns: 

33 TextSearchQuery with parsed include/exclude/required terms 

34 """ 

35 

36 # Handle empty/whitespace-only queries 

37 if not query.strip(): 

38 return TextSearchQuery() 

39 

40 result = TextSearchQuery() 

41 i = 0 

42 

43 while i < len(query): 

44 # Skip whitespace 

45 if query[i].isspace(): 

46 i += 1 

47 continue 

48 

49 # Check for prefix 

50 prefix = None 

51 if query[i] in "-!+": 

52 prefix_char = query[i] 

53 prefix_pos = i 

54 i += 1 

55 

56 # Check if this is immediately followed by a non-whitespace character 

57 if i < len(query) and not query[i].isspace(): 

58 # Valid prefix (no space between prefix and term) 

59 prefix = prefix_char 

60 else: 

61 # Prefix followed by space - ignore the prefix completely 

62 i = prefix_pos + 1 # Skip the prefix character 

63 prefix = None 

64 continue 

65 

66 # Handle quoted phrases 

67 if i < len(query) and query[i] == '"': 

68 i += 1 # Skip opening quote 

69 phrase_start = i 

70 

71 # Find closing quote, handling escaped quotes 

72 while i < len(query): 

73 if query[i] == "\\" and i + 1 < len(query): 

74 # Skip escaped character 

75 i += 2 

76 elif query[i] == '"': 

77 # Found unescaped closing quote 

78 break 

79 else: 

80 i += 1 

81 

82 # Extract phrase (even if quote is unclosed) 

83 phrase = query[phrase_start:i] 

84 

85 if i < len(query): # Found closing quote 

86 i += 1 # Skip closing quote 

87 

88 # Unescape quotes and backslashes in the phrase 

89 phrase = _unescape_phrase(phrase) 

90 

91 # Add to appropriate list based on prefix 

92 if phrase.strip(): 

93 if prefix == "-" or prefix == "!": 

94 result.exclude.append(phrase) 

95 elif prefix == "+": 

96 result.required.append(phrase) 

97 else: 

98 result.include.append(phrase) 

99 continue 

100 

101 # Handle regular terms 

102 if i < len(query): 

103 term_start = i 

104 

105 # Find end of term (next whitespace or quote) 

106 while i < len(query) and not query[i].isspace() and query[i] != '"': 

107 i += 1 

108 

109 term = query[term_start:i] 

110 

111 # Add to appropriate list based on prefix 

112 if term: 

113 if prefix == "-" or prefix == "!": 

114 result.exclude.append(term) 

115 elif prefix == "+": 

116 result.required.append(term) 

117 else: 

118 result.include.append(term) 

119 

120 return result 

121 

122 

123def _unescape_phrase(phrase: str) -> str: 1a

124 """Unescape quotes and backslashes in a quoted phrase""" 

125 # Process escapes in order: first backslashes, then quotes 

126 result = [] 

127 i = 0 

128 while i < len(phrase): 

129 if phrase[i] == "\\" and i + 1 < len(phrase): 

130 next_char = phrase[i + 1] 

131 if next_char == '"': 

132 result.append('"') 

133 i += 2 

134 elif next_char == "\\": 

135 result.append("\\") 

136 i += 2 

137 else: 

138 # Not an escape sequence, keep the backslash 

139 result.append("\\") 

140 i += 1 

141 else: 

142 result.append(phrase[i]) 

143 i += 1 

144 return "".join(result)