Coverage for /usr/local/lib/python3.12/site-packages/prefect/server/utilities/text_search_parser.py: 74%

74 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-12-05 10:48 +0000

1"""Text search query parser 

2 

3Parses text search queries according to the following syntax: 

4 

5- Space-separated terms → OR logic (include) 

6- Prefix with `-` or `!` → Exclude term 

7- Prefix with `+` → Required term (AND logic, future) 

8- Quote phrases → Match exact phrase 

9- Backslash escapes → Allow quotes within phrases (\") 

10- Case-insensitive, substring matching 

11- 200 character limit 

12""" 

13 

14from dataclasses import dataclass, field 1b

15 

16 

17@dataclass 1b

18class TextSearchQuery: 1b

19 """Parsed text search query structure""" 

20 

21 include: list[str] = field(default_factory=list) # OR terms 1b

22 exclude: list[str] = field(default_factory=list) # NOT terms (-/!) 1b

23 required: list[str] = field(default_factory=list) # AND terms (+) 1b

24 

25 

26def parse_text_search_query(query: str) -> TextSearchQuery: 1b

27 """Parse a text search query string into structured components 

28 

29 Args: 

30 query: The query string to parse 

31 

32 Returns: 

33 TextSearchQuery with parsed include/exclude/required terms 

34 """ 

35 

36 # Handle empty/whitespace-only queries 

37 if not query.strip(): 37 ↛ 38line 37 didn't jump to line 38 because the condition on line 37 was never true

38 return TextSearchQuery() 

39 

40 result = TextSearchQuery() 

41 i = 0 

42 

43 while i < len(query): 

44 # Skip whitespace 

45 if query[i].isspace(): 

46 i += 1 

47 continue 

48 

49 # Check for prefix 

50 prefix = None 

51 if query[i] in "-!+": 

52 prefix_char = query[i] 

53 prefix_pos = i 

54 i += 1 

55 

56 # Check if this is immediately followed by a non-whitespace character 

57 if i < len(query) and not query[i].isspace(): 57 ↛ 62line 57 didn't jump to line 62 because the condition on line 57 was always true

58 # Valid prefix (no space between prefix and term) 

59 prefix = prefix_char 

60 else: 

61 # Prefix followed by space - ignore the prefix completely 

62 i = prefix_pos + 1 # Skip the prefix character 

63 prefix = None 

64 continue 

65 

66 # Handle quoted phrases 

67 if i < len(query) and query[i] == '"': 

68 i += 1 # Skip opening quote 

69 phrase_start = i 

70 

71 # Find closing quote, handling escaped quotes 

72 while i < len(query): 72 ↛ 83line 72 didn't jump to line 83 because the condition on line 72 was always true

73 if query[i] == "\\" and i + 1 < len(query): 73 ↛ 75line 73 didn't jump to line 75 because the condition on line 73 was never true

74 # Skip escaped character 

75 i += 2 

76 elif query[i] == '"': 

77 # Found unescaped closing quote 

78 break 

79 else: 

80 i += 1 

81 

82 # Extract phrase (even if quote is unclosed) 

83 phrase = query[phrase_start:i] 

84 

85 if i < len(query): # Found closing quote 85 ↛ 89line 85 didn't jump to line 89 because the condition on line 85 was always true

86 i += 1 # Skip closing quote 

87 

88 # Unescape quotes and backslashes in the phrase 

89 phrase = _unescape_phrase(phrase) 

90 

91 # Add to appropriate list based on prefix 

92 if phrase.strip(): 92 ↛ 99line 92 didn't jump to line 99 because the condition on line 92 was always true

93 if prefix == "-" or prefix == "!": 93 ↛ 94line 93 didn't jump to line 94 because the condition on line 93 was never true

94 result.exclude.append(phrase) 

95 elif prefix == "+": 95 ↛ 96line 95 didn't jump to line 96 because the condition on line 95 was never true

96 result.required.append(phrase) 

97 else: 

98 result.include.append(phrase) 

99 continue 

100 

101 # Handle regular terms 

102 if i < len(query): 102 ↛ 43line 102 didn't jump to line 43 because the condition on line 102 was always true

103 term_start = i 

104 

105 # Find end of term (next whitespace or quote) 

106 while i < len(query) and not query[i].isspace() and query[i] != '"': 

107 i += 1 

108 

109 term = query[term_start:i] 

110 

111 # Add to appropriate list based on prefix 

112 if term: 112 ↛ 43line 112 didn't jump to line 43 because the condition on line 112 was always true

113 if prefix == "-" or prefix == "!": 

114 result.exclude.append(term) 

115 elif prefix == "+": 

116 result.required.append(term) 

117 else: 

118 result.include.append(term) 

119 

120 return result 

121 

122 

123def _unescape_phrase(phrase: str) -> str: 1b

124 """Unescape quotes and backslashes in a quoted phrase""" 

125 # Process escapes in order: first backslashes, then quotes 

126 result = [] 

127 i = 0 

128 while i < len(phrase): 

129 if phrase[i] == "\\" and i + 1 < len(phrase): 129 ↛ 130line 129 didn't jump to line 130 because the condition on line 129 was never true

130 next_char = phrase[i + 1] 

131 if next_char == '"': 

132 result.append('"') 

133 i += 2 

134 elif next_char == "\\": 

135 result.append("\\") 

136 i += 2 

137 else: 

138 # Not an escape sequence, keep the backslash 

139 result.append("\\") 

140 i += 1 

141 else: 

142 result.append(phrase[i]) 

143 i += 1 

144 return "".join(result)