1 | use std::{collections::HashMap, format, string::String, vec::Vec}; |
2 | |
3 | use crate::{ |
4 | AhoCorasick, AhoCorasickBuilder, AhoCorasickKind, Anchored, Input, Match, |
5 | MatchKind, StartKind, |
6 | }; |
7 | |
8 | /// A description of a single test against an Aho-Corasick automaton. |
9 | /// |
10 | /// A single test may not necessarily pass on every configuration of an |
11 | /// Aho-Corasick automaton. The tests are categorized and grouped appropriately |
12 | /// below. |
13 | #[derive(Clone, Debug, Eq, PartialEq)] |
14 | struct SearchTest { |
15 | /// The name of this test, for debugging. |
16 | name: &'static str, |
17 | /// The patterns to search for. |
18 | patterns: &'static [&'static str], |
19 | /// The text to search. |
20 | haystack: &'static str, |
21 | /// Each match is a triple of (pattern_index, start, end), where |
22 | /// pattern_index is an index into `patterns` and `start`/`end` are indices |
23 | /// into `haystack`. |
24 | matches: &'static [(usize, usize, usize)], |
25 | } |
26 | |
27 | /// Short-hand constructor for SearchTest. We use it a lot below. |
28 | macro_rules! t { |
29 | ($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => { |
30 | SearchTest { |
31 | name: stringify!($name), |
32 | patterns: $patterns, |
33 | haystack: $haystack, |
34 | matches: $matches, |
35 | } |
36 | }; |
37 | } |
38 | |
39 | /// A collection of test groups. |
40 | type TestCollection = &'static [&'static [SearchTest]]; |
41 | |
42 | // Define several collections corresponding to the different type of match |
43 | // semantics supported by Aho-Corasick. These collections have some overlap, |
44 | // but each collection should have some tests that no other collection has. |
45 | |
46 | /// Tests for Aho-Corasick's standard non-overlapping match semantics. |
47 | const AC_STANDARD_NON_OVERLAPPING: TestCollection = |
48 | &[BASICS, NON_OVERLAPPING, STANDARD, REGRESSION]; |
49 | |
50 | /// Tests for Aho-Corasick's anchored standard non-overlapping match semantics. |
51 | const AC_STANDARD_ANCHORED_NON_OVERLAPPING: TestCollection = |
52 | &[ANCHORED_BASICS, ANCHORED_NON_OVERLAPPING, STANDARD_ANCHORED]; |
53 | |
54 | /// Tests for Aho-Corasick's standard overlapping match semantics. |
55 | const AC_STANDARD_OVERLAPPING: TestCollection = |
56 | &[BASICS, OVERLAPPING, REGRESSION]; |
57 | |
58 | /* |
59 | Iterators of anchored overlapping searches were removed from the API in |
60 | after 0.7, but we leave the tests commented out for posterity. |
61 | /// Tests for Aho-Corasick's anchored standard overlapping match semantics. |
62 | const AC_STANDARD_ANCHORED_OVERLAPPING: TestCollection = |
63 | &[ANCHORED_BASICS, ANCHORED_OVERLAPPING]; |
64 | */ |
65 | |
66 | /// Tests for Aho-Corasick's leftmost-first match semantics. |
67 | const AC_LEFTMOST_FIRST: TestCollection = |
68 | &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_FIRST, REGRESSION]; |
69 | |
70 | /// Tests for Aho-Corasick's anchored leftmost-first match semantics. |
71 | const AC_LEFTMOST_FIRST_ANCHORED: TestCollection = &[ |
72 | ANCHORED_BASICS, |
73 | ANCHORED_NON_OVERLAPPING, |
74 | ANCHORED_LEFTMOST, |
75 | ANCHORED_LEFTMOST_FIRST, |
76 | ]; |
77 | |
78 | /// Tests for Aho-Corasick's leftmost-longest match semantics. |
79 | const AC_LEFTMOST_LONGEST: TestCollection = |
80 | &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_LONGEST, REGRESSION]; |
81 | |
82 | /// Tests for Aho-Corasick's anchored leftmost-longest match semantics. |
83 | const AC_LEFTMOST_LONGEST_ANCHORED: TestCollection = &[ |
84 | ANCHORED_BASICS, |
85 | ANCHORED_NON_OVERLAPPING, |
86 | ANCHORED_LEFTMOST, |
87 | ANCHORED_LEFTMOST_LONGEST, |
88 | ]; |
89 | |
90 | // Now define the individual tests that make up the collections above. |
91 | |
92 | /// A collection of tests for the Aho-Corasick algorithm that should always be |
93 | /// true regardless of match semantics. That is, all combinations of |
94 | /// leftmost-{shortest, first, longest} x {overlapping, non-overlapping} |
95 | /// should produce the same answer. |
96 | const BASICS: &'static [SearchTest] = &[ |
97 | t!(basic000, &[], "" , &[]), |
98 | t!(basic001, &["" ], "a" , &[(0, 0, 0), (0, 1, 1)]), |
99 | t!(basic002, &["a" ], "" , &[]), |
100 | t!(basic010, &["a" ], "a" , &[(0, 0, 1)]), |
101 | t!(basic020, &["a" ], "aa" , &[(0, 0, 1), (0, 1, 2)]), |
102 | t!(basic030, &["a" ], "aaa" , &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]), |
103 | t!(basic040, &["a" ], "aba" , &[(0, 0, 1), (0, 2, 3)]), |
104 | t!(basic050, &["a" ], "bba" , &[(0, 2, 3)]), |
105 | t!(basic060, &["a" ], "bbb" , &[]), |
106 | t!(basic070, &["a" ], "bababbbba" , &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]), |
107 | t!(basic100, &["aa" ], "" , &[]), |
108 | t!(basic110, &["aa" ], "aa" , &[(0, 0, 2)]), |
109 | t!(basic120, &["aa" ], "aabbaa" , &[(0, 0, 2), (0, 4, 6)]), |
110 | t!(basic130, &["aa" ], "abbab" , &[]), |
111 | t!(basic140, &["aa" ], "abbabaa" , &[(0, 5, 7)]), |
112 | t!(basic200, &["abc" ], "abc" , &[(0, 0, 3)]), |
113 | t!(basic210, &["abc" ], "zazabzabcz" , &[(0, 6, 9)]), |
114 | t!(basic220, &["abc" ], "zazabczabcz" , &[(0, 3, 6), (0, 7, 10)]), |
115 | t!(basic300, &["a" , "b" ], "" , &[]), |
116 | t!(basic310, &["a" , "b" ], "z" , &[]), |
117 | t!(basic320, &["a" , "b" ], "b" , &[(1, 0, 1)]), |
118 | t!(basic330, &["a" , "b" ], "a" , &[(0, 0, 1)]), |
119 | t!( |
120 | basic340, |
121 | &["a" , "b" ], |
122 | "abba" , |
123 | &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),] |
124 | ), |
125 | t!( |
126 | basic350, |
127 | &["b" , "a" ], |
128 | "abba" , |
129 | &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),] |
130 | ), |
131 | t!(basic360, &["abc" , "bc" ], "xbc" , &[(1, 1, 3),]), |
132 | t!(basic400, &["foo" , "bar" ], "" , &[]), |
133 | t!(basic410, &["foo" , "bar" ], "foobar" , &[(0, 0, 3), (1, 3, 6),]), |
134 | t!(basic420, &["foo" , "bar" ], "barfoo" , &[(1, 0, 3), (0, 3, 6),]), |
135 | t!(basic430, &["foo" , "bar" ], "foofoo" , &[(0, 0, 3), (0, 3, 6),]), |
136 | t!(basic440, &["foo" , "bar" ], "barbar" , &[(1, 0, 3), (1, 3, 6),]), |
137 | t!(basic450, &["foo" , "bar" ], "bafofoo" , &[(0, 4, 7),]), |
138 | t!(basic460, &["bar" , "foo" ], "bafofoo" , &[(1, 4, 7),]), |
139 | t!(basic470, &["foo" , "bar" ], "fobabar" , &[(1, 4, 7),]), |
140 | t!(basic480, &["bar" , "foo" ], "fobabar" , &[(0, 4, 7),]), |
141 | t!(basic600, &["" ], "" , &[(0, 0, 0)]), |
142 | t!(basic610, &["" ], "a" , &[(0, 0, 0), (0, 1, 1)]), |
143 | t!(basic620, &["" ], "abc" , &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]), |
144 | t!(basic700, &["yabcdef" , "abcdezghi" ], "yabcdefghi" , &[(0, 0, 7),]), |
145 | t!(basic710, &["yabcdef" , "abcdezghi" ], "yabcdezghi" , &[(1, 1, 10),]), |
146 | t!( |
147 | basic720, |
148 | &["yabcdef" , "bcdeyabc" , "abcdezghi" ], |
149 | "yabcdezghi" , |
150 | &[(2, 1, 10),] |
151 | ), |
152 | ]; |
153 | |
154 | /// A collection of *anchored* tests for the Aho-Corasick algorithm that should |
155 | /// always be true regardless of match semantics. That is, all combinations of |
156 | /// leftmost-{shortest, first, longest} x {overlapping, non-overlapping} should |
157 | /// produce the same answer. |
158 | const ANCHORED_BASICS: &'static [SearchTest] = &[ |
159 | t!(abasic000, &[], "" , &[]), |
160 | t!(abasic001, &[], "a" , &[]), |
161 | t!(abasic002, &[], "abc" , &[]), |
162 | t!(abasic010, &["" ], "" , &[(0, 0, 0)]), |
163 | t!(abasic020, &["" ], "a" , &[(0, 0, 0), (0, 1, 1)]), |
164 | t!(abasic030, &["" ], "abc" , &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]), |
165 | t!(abasic100, &["a" ], "a" , &[(0, 0, 1)]), |
166 | t!(abasic110, &["a" ], "aa" , &[(0, 0, 1), (0, 1, 2)]), |
167 | t!(abasic120, &["a" , "b" ], "ab" , &[(0, 0, 1), (1, 1, 2)]), |
168 | t!(abasic130, &["a" , "b" ], "ba" , &[(1, 0, 1), (0, 1, 2)]), |
169 | t!(abasic140, &["foo" , "foofoo" ], "foo" , &[(0, 0, 3)]), |
170 | t!(abasic150, &["foofoo" , "foo" ], "foo" , &[(1, 0, 3)]), |
171 | t!(abasic200, &["foo" ], "foofoo foo" , &[(0, 0, 3), (0, 3, 6)]), |
172 | ]; |
173 | |
174 | /// Tests for non-overlapping standard match semantics. |
175 | /// |
176 | /// These tests generally shouldn't pass for leftmost-{first,longest}, although |
177 | /// some do in order to write clearer tests. For example, standard000 will |
178 | /// pass with leftmost-first semantics, but standard010 will not. We write |
179 | /// both to emphasize how the match semantics work. |
180 | const STANDARD: &'static [SearchTest] = &[ |
181 | t!(standard000, &["ab" , "abcd" ], "abcd" , &[(0, 0, 2)]), |
182 | t!(standard010, &["abcd" , "ab" ], "abcd" , &[(1, 0, 2)]), |
183 | t!(standard020, &["abcd" , "ab" , "abc" ], "abcd" , &[(1, 0, 2)]), |
184 | t!(standard030, &["abcd" , "abc" , "ab" ], "abcd" , &[(2, 0, 2)]), |
185 | t!(standard040, &["a" , "" ], "a" , &[(1, 0, 0), (1, 1, 1)]), |
186 | t!( |
187 | standard400, |
188 | &["abcd" , "bcd" , "cd" , "b" ], |
189 | "abcd" , |
190 | &[(3, 1, 2), (2, 2, 4),] |
191 | ), |
192 | t!(standard410, &["" , "a" ], "a" , &[(0, 0, 0), (0, 1, 1),]), |
193 | t!(standard420, &["" , "a" ], "aa" , &[(0, 0, 0), (0, 1, 1), (0, 2, 2),]), |
194 | t!(standard430, &["" , "a" , "" ], "a" , &[(0, 0, 0), (0, 1, 1),]), |
195 | t!(standard440, &["a" , "" , "" ], "a" , &[(1, 0, 0), (1, 1, 1),]), |
196 | t!(standard450, &["" , "" , "a" ], "a" , &[(0, 0, 0), (0, 1, 1),]), |
197 | ]; |
198 | |
199 | /// Like STANDARD, but for anchored searches. |
200 | const STANDARD_ANCHORED: &'static [SearchTest] = &[ |
201 | t!(astandard000, &["ab" , "abcd" ], "abcd" , &[(0, 0, 2)]), |
202 | t!(astandard010, &["abcd" , "ab" ], "abcd" , &[(1, 0, 2)]), |
203 | t!(astandard020, &["abcd" , "ab" , "abc" ], "abcd" , &[(1, 0, 2)]), |
204 | t!(astandard030, &["abcd" , "abc" , "ab" ], "abcd" , &[(2, 0, 2)]), |
205 | t!(astandard040, &["a" , "" ], "a" , &[(1, 0, 0), (1, 1, 1)]), |
206 | t!(astandard050, &["abcd" , "bcd" , "cd" , "b" ], "abcd" , &[(0, 0, 4)]), |
207 | t!(astandard410, &["" , "a" ], "a" , &[(0, 0, 0), (0, 1, 1)]), |
208 | t!(astandard420, &["" , "a" ], "aa" , &[(0, 0, 0), (0, 1, 1), (0, 2, 2)]), |
209 | t!(astandard430, &["" , "a" , "" ], "a" , &[(0, 0, 0), (0, 1, 1)]), |
210 | t!(astandard440, &["a" , "" , "" ], "a" , &[(1, 0, 0), (1, 1, 1)]), |
211 | t!(astandard450, &["" , "" , "a" ], "a" , &[(0, 0, 0), (0, 1, 1)]), |
212 | ]; |
213 | |
214 | /// Tests for non-overlapping leftmost match semantics. These should pass for |
215 | /// both leftmost-first and leftmost-longest match kinds. Stated differently, |
216 | /// among ambiguous matches, the longest match and the match that appeared |
217 | /// first when constructing the automaton should always be the same. |
218 | const LEFTMOST: &'static [SearchTest] = &[ |
219 | t!(leftmost000, &["ab" , "ab" ], "abcd" , &[(0, 0, 2)]), |
220 | t!(leftmost010, &["a" , "" ], "a" , &[(0, 0, 1)]), |
221 | t!(leftmost011, &["a" , "" ], "ab" , &[(0, 0, 1), (1, 2, 2)]), |
222 | t!(leftmost020, &["" , "" ], "a" , &[(0, 0, 0), (0, 1, 1)]), |
223 | t!(leftmost030, &["a" , "ab" ], "aa" , &[(0, 0, 1), (0, 1, 2)]), |
224 | t!(leftmost031, &["ab" , "a" ], "aa" , &[(1, 0, 1), (1, 1, 2)]), |
225 | t!(leftmost032, &["ab" , "a" ], "xayabbbz" , &[(1, 1, 2), (0, 3, 5)]), |
226 | t!(leftmost300, &["abcd" , "bce" , "b" ], "abce" , &[(1, 1, 4)]), |
227 | t!(leftmost310, &["abcd" , "ce" , "bc" ], "abce" , &[(2, 1, 3)]), |
228 | t!(leftmost320, &["abcd" , "bce" , "ce" , "b" ], "abce" , &[(1, 1, 4)]), |
229 | t!(leftmost330, &["abcd" , "bce" , "cz" , "bc" ], "abcz" , &[(3, 1, 3)]), |
230 | t!(leftmost340, &["bce" , "cz" , "bc" ], "bcz" , &[(2, 0, 2)]), |
231 | t!(leftmost350, &["abc" , "bd" , "ab" ], "abd" , &[(2, 0, 2)]), |
232 | t!( |
233 | leftmost360, |
234 | &["abcdefghi" , "hz" , "abcdefgh" ], |
235 | "abcdefghz" , |
236 | &[(2, 0, 8),] |
237 | ), |
238 | t!( |
239 | leftmost370, |
240 | &["abcdefghi" , "cde" , "hz" , "abcdefgh" ], |
241 | "abcdefghz" , |
242 | &[(3, 0, 8),] |
243 | ), |
244 | t!( |
245 | leftmost380, |
246 | &["abcdefghi" , "hz" , "abcdefgh" , "a" ], |
247 | "abcdefghz" , |
248 | &[(2, 0, 8),] |
249 | ), |
250 | t!( |
251 | leftmost390, |
252 | &["b" , "abcdefghi" , "hz" , "abcdefgh" ], |
253 | "abcdefghz" , |
254 | &[(3, 0, 8),] |
255 | ), |
256 | t!( |
257 | leftmost400, |
258 | &["h" , "abcdefghi" , "hz" , "abcdefgh" ], |
259 | "abcdefghz" , |
260 | &[(3, 0, 8),] |
261 | ), |
262 | t!( |
263 | leftmost410, |
264 | &["z" , "abcdefghi" , "hz" , "abcdefgh" ], |
265 | "abcdefghz" , |
266 | &[(3, 0, 8), (0, 8, 9),] |
267 | ), |
268 | ]; |
269 | |
270 | /// Like LEFTMOST, but for anchored searches. |
271 | const ANCHORED_LEFTMOST: &'static [SearchTest] = &[ |
272 | t!(aleftmost000, &["ab" , "ab" ], "abcd" , &[(0, 0, 2)]), |
273 | // We shouldn't allow an empty match immediately following a match, right? |
274 | t!(aleftmost010, &["a" , "" ], "a" , &[(0, 0, 1)]), |
275 | t!(aleftmost020, &["" , "" ], "a" , &[(0, 0, 0), (0, 1, 1)]), |
276 | t!(aleftmost030, &["a" , "ab" ], "aa" , &[(0, 0, 1), (0, 1, 2)]), |
277 | t!(aleftmost031, &["ab" , "a" ], "aa" , &[(1, 0, 1), (1, 1, 2)]), |
278 | t!(aleftmost032, &["ab" , "a" ], "xayabbbz" , &[]), |
279 | t!(aleftmost300, &["abcd" , "bce" , "b" ], "abce" , &[]), |
280 | t!(aleftmost301, &["abcd" , "bcd" , "cd" , "b" ], "abcd" , &[(0, 0, 4)]), |
281 | t!(aleftmost310, &["abcd" , "ce" , "bc" ], "abce" , &[]), |
282 | t!(aleftmost320, &["abcd" , "bce" , "ce" , "b" ], "abce" , &[]), |
283 | t!(aleftmost330, &["abcd" , "bce" , "cz" , "bc" ], "abcz" , &[]), |
284 | t!(aleftmost340, &["bce" , "cz" , "bc" ], "bcz" , &[(2, 0, 2)]), |
285 | t!(aleftmost350, &["abc" , "bd" , "ab" ], "abd" , &[(2, 0, 2)]), |
286 | t!( |
287 | aleftmost360, |
288 | &["abcdefghi" , "hz" , "abcdefgh" ], |
289 | "abcdefghz" , |
290 | &[(2, 0, 8),] |
291 | ), |
292 | t!( |
293 | aleftmost370, |
294 | &["abcdefghi" , "cde" , "hz" , "abcdefgh" ], |
295 | "abcdefghz" , |
296 | &[(3, 0, 8),] |
297 | ), |
298 | t!( |
299 | aleftmost380, |
300 | &["abcdefghi" , "hz" , "abcdefgh" , "a" ], |
301 | "abcdefghz" , |
302 | &[(2, 0, 8),] |
303 | ), |
304 | t!( |
305 | aleftmost390, |
306 | &["b" , "abcdefghi" , "hz" , "abcdefgh" ], |
307 | "abcdefghz" , |
308 | &[(3, 0, 8),] |
309 | ), |
310 | t!( |
311 | aleftmost400, |
312 | &["h" , "abcdefghi" , "hz" , "abcdefgh" ], |
313 | "abcdefghz" , |
314 | &[(3, 0, 8),] |
315 | ), |
316 | t!( |
317 | aleftmost410, |
318 | &["z" , "abcdefghi" , "hz" , "abcdefgh" ], |
319 | "abcdefghzyz" , |
320 | &[(3, 0, 8), (0, 8, 9)] |
321 | ), |
322 | ]; |
323 | |
324 | /// Tests for non-overlapping leftmost-first match semantics. These tests |
325 | /// should generally be specific to leftmost-first, which means they should |
326 | /// generally fail under leftmost-longest semantics. |
327 | const LEFTMOST_FIRST: &'static [SearchTest] = &[ |
328 | t!(leftfirst000, &["ab" , "abcd" ], "abcd" , &[(0, 0, 2)]), |
329 | t!(leftfirst010, &["" , "a" ], "a" , &[(0, 0, 0), (0, 1, 1)]), |
330 | t!(leftfirst011, &["" , "a" , "" ], "a" , &[(0, 0, 0), (0, 1, 1),]), |
331 | t!(leftfirst012, &["a" , "" , "" ], "a" , &[(0, 0, 1)]), |
332 | t!(leftfirst013, &["" , "" , "a" ], "a" , &[(0, 0, 0), (0, 1, 1)]), |
333 | t!(leftfirst014, &["a" , "" ], "a" , &[(0, 0, 1)]), |
334 | t!(leftfirst015, &["a" , "" ], "ab" , &[(0, 0, 1), (1, 2, 2)]), |
335 | t!(leftfirst020, &["abcd" , "ab" ], "abcd" , &[(0, 0, 4)]), |
336 | t!(leftfirst030, &["ab" , "ab" ], "abcd" , &[(0, 0, 2)]), |
337 | t!(leftfirst040, &["a" , "ab" ], "xayabbbz" , &[(0, 1, 2), (0, 3, 4)]), |
338 | t!(leftfirst100, &["abcdefg" , "bcde" , "bcdef" ], "abcdef" , &[(1, 1, 5)]), |
339 | t!(leftfirst110, &["abcdefg" , "bcdef" , "bcde" ], "abcdef" , &[(1, 1, 6)]), |
340 | t!(leftfirst300, &["abcd" , "b" , "bce" ], "abce" , &[(1, 1, 2)]), |
341 | t!( |
342 | leftfirst310, |
343 | &["abcd" , "b" , "bce" , "ce" ], |
344 | "abce" , |
345 | &[(1, 1, 2), (3, 2, 4),] |
346 | ), |
347 | t!( |
348 | leftfirst320, |
349 | &["a" , "abcdefghi" , "hz" , "abcdefgh" ], |
350 | "abcdefghz" , |
351 | &[(0, 0, 1), (2, 7, 9),] |
352 | ), |
353 | t!(leftfirst330, &["a" , "abab" ], "abab" , &[(0, 0, 1), (0, 2, 3)]), |
354 | t!(leftfirst400, &["amwix" , "samwise" , "sam" ], "Zsamwix" , &[(2, 1, 4)]), |
355 | ]; |
356 | |
357 | /// Like LEFTMOST_FIRST, but for anchored searches. |
358 | const ANCHORED_LEFTMOST_FIRST: &'static [SearchTest] = &[ |
359 | t!(aleftfirst000, &["ab" , "abcd" ], "abcd" , &[(0, 0, 2)]), |
360 | t!(aleftfirst010, &["" , "a" ], "a" , &[(0, 0, 0), (0, 1, 1)]), |
361 | t!(aleftfirst011, &["" , "a" , "" ], "a" , &[(0, 0, 0), (0, 1, 1)]), |
362 | t!(aleftfirst012, &["a" , "" , "" ], "a" , &[(0, 0, 1)]), |
363 | t!(aleftfirst013, &["" , "" , "a" ], "a" , &[(0, 0, 0), (0, 1, 1)]), |
364 | t!(aleftfirst020, &["abcd" , "ab" ], "abcd" , &[(0, 0, 4)]), |
365 | t!(aleftfirst030, &["ab" , "ab" ], "abcd" , &[(0, 0, 2)]), |
366 | t!(aleftfirst040, &["a" , "ab" ], "xayabbbz" , &[]), |
367 | t!(aleftfirst100, &["abcdefg" , "bcde" , "bcdef" ], "abcdef" , &[]), |
368 | t!(aleftfirst110, &["abcdefg" , "bcdef" , "bcde" ], "abcdef" , &[]), |
369 | t!(aleftfirst300, &["abcd" , "b" , "bce" ], "abce" , &[]), |
370 | t!(aleftfirst310, &["abcd" , "b" , "bce" , "ce" ], "abce" , &[]), |
371 | t!( |
372 | aleftfirst320, |
373 | &["a" , "abcdefghi" , "hz" , "abcdefgh" ], |
374 | "abcdefghz" , |
375 | &[(0, 0, 1)] |
376 | ), |
377 | t!(aleftfirst330, &["a" , "abab" ], "abab" , &[(0, 0, 1)]), |
378 | t!(aleftfirst400, &["wise" , "samwise" , "sam" ], "samwix" , &[(2, 0, 3)]), |
379 | ]; |
380 | |
381 | /// Tests for non-overlapping leftmost-longest match semantics. These tests |
382 | /// should generally be specific to leftmost-longest, which means they should |
383 | /// generally fail under leftmost-first semantics. |
384 | const LEFTMOST_LONGEST: &'static [SearchTest] = &[ |
385 | t!(leftlong000, &["ab" , "abcd" ], "abcd" , &[(1, 0, 4)]), |
386 | t!(leftlong010, &["abcd" , "bcd" , "cd" , "b" ], "abcd" , &[(0, 0, 4),]), |
387 | t!(leftlong020, &["" , "a" ], "a" , &[(1, 0, 1)]), |
388 | t!(leftlong021, &["" , "a" , "" ], "a" , &[(1, 0, 1)]), |
389 | t!(leftlong022, &["a" , "" , "" ], "a" , &[(0, 0, 1)]), |
390 | t!(leftlong023, &["" , "" , "a" ], "a" , &[(2, 0, 1)]), |
391 | t!(leftlong024, &["" , "a" ], "ab" , &[(1, 0, 1), (0, 2, 2)]), |
392 | t!(leftlong030, &["" , "a" ], "aa" , &[(1, 0, 1), (1, 1, 2)]), |
393 | t!(leftlong040, &["a" , "ab" ], "a" , &[(0, 0, 1)]), |
394 | t!(leftlong050, &["a" , "ab" ], "ab" , &[(1, 0, 2)]), |
395 | t!(leftlong060, &["ab" , "a" ], "a" , &[(1, 0, 1)]), |
396 | t!(leftlong070, &["ab" , "a" ], "ab" , &[(0, 0, 2)]), |
397 | t!(leftlong100, &["abcdefg" , "bcde" , "bcdef" ], "abcdef" , &[(2, 1, 6)]), |
398 | t!(leftlong110, &["abcdefg" , "bcdef" , "bcde" ], "abcdef" , &[(1, 1, 6)]), |
399 | t!(leftlong300, &["abcd" , "b" , "bce" ], "abce" , &[(2, 1, 4)]), |
400 | t!( |
401 | leftlong310, |
402 | &["a" , "abcdefghi" , "hz" , "abcdefgh" ], |
403 | "abcdefghz" , |
404 | &[(3, 0, 8),] |
405 | ), |
406 | t!(leftlong320, &["a" , "abab" ], "abab" , &[(1, 0, 4)]), |
407 | t!(leftlong330, &["abcd" , "b" , "ce" ], "abce" , &[(1, 1, 2), (2, 2, 4),]), |
408 | t!(leftlong340, &["a" , "ab" ], "xayabbbz" , &[(0, 1, 2), (1, 3, 5)]), |
409 | ]; |
410 | |
411 | /// Like LEFTMOST_LONGEST, but for anchored searches. |
412 | const ANCHORED_LEFTMOST_LONGEST: &'static [SearchTest] = &[ |
413 | t!(aleftlong000, &["ab" , "abcd" ], "abcd" , &[(1, 0, 4)]), |
414 | t!(aleftlong010, &["abcd" , "bcd" , "cd" , "b" ], "abcd" , &[(0, 0, 4),]), |
415 | t!(aleftlong020, &["" , "a" ], "a" , &[(1, 0, 1)]), |
416 | t!(aleftlong021, &["" , "a" , "" ], "a" , &[(1, 0, 1)]), |
417 | t!(aleftlong022, &["a" , "" , "" ], "a" , &[(0, 0, 1)]), |
418 | t!(aleftlong023, &["" , "" , "a" ], "a" , &[(2, 0, 1)]), |
419 | t!(aleftlong030, &["" , "a" ], "aa" , &[(1, 0, 1), (1, 1, 2)]), |
420 | t!(aleftlong040, &["a" , "ab" ], "a" , &[(0, 0, 1)]), |
421 | t!(aleftlong050, &["a" , "ab" ], "ab" , &[(1, 0, 2)]), |
422 | t!(aleftlong060, &["ab" , "a" ], "a" , &[(1, 0, 1)]), |
423 | t!(aleftlong070, &["ab" , "a" ], "ab" , &[(0, 0, 2)]), |
424 | t!(aleftlong100, &["abcdefg" , "bcde" , "bcdef" ], "abcdef" , &[]), |
425 | t!(aleftlong110, &["abcdefg" , "bcdef" , "bcde" ], "abcdef" , &[]), |
426 | t!(aleftlong300, &["abcd" , "b" , "bce" ], "abce" , &[]), |
427 | t!( |
428 | aleftlong310, |
429 | &["a" , "abcdefghi" , "hz" , "abcdefgh" ], |
430 | "abcdefghz" , |
431 | &[(3, 0, 8),] |
432 | ), |
433 | t!(aleftlong320, &["a" , "abab" ], "abab" , &[(1, 0, 4)]), |
434 | t!(aleftlong330, &["abcd" , "b" , "ce" ], "abce" , &[]), |
435 | t!(aleftlong340, &["a" , "ab" ], "xayabbbz" , &[]), |
436 | ]; |
437 | |
438 | /// Tests for non-overlapping match semantics. |
439 | /// |
440 | /// Generally these tests shouldn't pass when using overlapping semantics. |
441 | /// These should pass for both standard and leftmost match semantics. |
442 | const NON_OVERLAPPING: &'static [SearchTest] = &[ |
443 | t!(nover010, &["abcd" , "bcd" , "cd" ], "abcd" , &[(0, 0, 4),]), |
444 | t!(nover020, &["bcd" , "cd" , "abcd" ], "abcd" , &[(2, 0, 4),]), |
445 | t!(nover030, &["abc" , "bc" ], "zazabcz" , &[(0, 3, 6),]), |
446 | t!( |
447 | nover100, |
448 | &["ab" , "ba" ], |
449 | "abababa" , |
450 | &[(0, 0, 2), (0, 2, 4), (0, 4, 6),] |
451 | ), |
452 | t!(nover200, &["foo" , "foo" ], "foobarfoo" , &[(0, 0, 3), (0, 6, 9),]), |
453 | t!(nover300, &["" , "" ], "" , &[(0, 0, 0),]), |
454 | t!(nover310, &["" , "" ], "a" , &[(0, 0, 0), (0, 1, 1),]), |
455 | ]; |
456 | |
457 | /// Like NON_OVERLAPPING, but for anchored searches. |
458 | const ANCHORED_NON_OVERLAPPING: &'static [SearchTest] = &[ |
459 | t!(anover010, &["abcd" , "bcd" , "cd" ], "abcd" , &[(0, 0, 4),]), |
460 | t!(anover020, &["bcd" , "cd" , "abcd" ], "abcd" , &[(2, 0, 4),]), |
461 | t!(anover030, &["abc" , "bc" ], "zazabcz" , &[]), |
462 | t!( |
463 | anover100, |
464 | &["ab" , "ba" ], |
465 | "abababa" , |
466 | &[(0, 0, 2), (0, 2, 4), (0, 4, 6)] |
467 | ), |
468 | t!(anover200, &["foo" , "foo" ], "foobarfoo" , &[(0, 0, 3)]), |
469 | t!(anover300, &["" , "" ], "" , &[(0, 0, 0)]), |
470 | t!(anover310, &["" , "" ], "a" , &[(0, 0, 0), (0, 1, 1)]), |
471 | ]; |
472 | |
473 | /// Tests for overlapping match semantics. |
474 | /// |
475 | /// This only supports standard match semantics, since leftmost-{first,longest} |
476 | /// do not support overlapping matches. |
477 | const OVERLAPPING: &'static [SearchTest] = &[ |
478 | t!( |
479 | over000, |
480 | &["abcd" , "bcd" , "cd" , "b" ], |
481 | "abcd" , |
482 | &[(3, 1, 2), (0, 0, 4), (1, 1, 4), (2, 2, 4),] |
483 | ), |
484 | t!( |
485 | over010, |
486 | &["bcd" , "cd" , "b" , "abcd" ], |
487 | "abcd" , |
488 | &[(2, 1, 2), (3, 0, 4), (0, 1, 4), (1, 2, 4),] |
489 | ), |
490 | t!( |
491 | over020, |
492 | &["abcd" , "bcd" , "cd" ], |
493 | "abcd" , |
494 | &[(0, 0, 4), (1, 1, 4), (2, 2, 4),] |
495 | ), |
496 | t!( |
497 | over030, |
498 | &["bcd" , "abcd" , "cd" ], |
499 | "abcd" , |
500 | &[(1, 0, 4), (0, 1, 4), (2, 2, 4),] |
501 | ), |
502 | t!( |
503 | over040, |
504 | &["bcd" , "cd" , "abcd" ], |
505 | "abcd" , |
506 | &[(2, 0, 4), (0, 1, 4), (1, 2, 4),] |
507 | ), |
508 | t!(over050, &["abc" , "bc" ], "zazabcz" , &[(0, 3, 6), (1, 4, 6),]), |
509 | t!( |
510 | over100, |
511 | &["ab" , "ba" ], |
512 | "abababa" , |
513 | &[(0, 0, 2), (1, 1, 3), (0, 2, 4), (1, 3, 5), (0, 4, 6), (1, 5, 7),] |
514 | ), |
515 | t!( |
516 | over200, |
517 | &["foo" , "foo" ], |
518 | "foobarfoo" , |
519 | &[(0, 0, 3), (1, 0, 3), (0, 6, 9), (1, 6, 9),] |
520 | ), |
521 | t!(over300, &["" , "" ], "" , &[(0, 0, 0), (1, 0, 0),]), |
522 | t!( |
523 | over310, |
524 | &["" , "" ], |
525 | "a" , |
526 | &[(0, 0, 0), (1, 0, 0), (0, 1, 1), (1, 1, 1),] |
527 | ), |
528 | t!(over320, &["" , "a" ], "a" , &[(0, 0, 0), (1, 0, 1), (0, 1, 1),]), |
529 | t!( |
530 | over330, |
531 | &["" , "a" , "" ], |
532 | "a" , |
533 | &[(0, 0, 0), (2, 0, 0), (1, 0, 1), (0, 1, 1), (2, 1, 1),] |
534 | ), |
535 | t!( |
536 | over340, |
537 | &["a" , "" , "" ], |
538 | "a" , |
539 | &[(1, 0, 0), (2, 0, 0), (0, 0, 1), (1, 1, 1), (2, 1, 1),] |
540 | ), |
541 | t!( |
542 | over350, |
543 | &["" , "" , "a" ], |
544 | "a" , |
545 | &[(0, 0, 0), (1, 0, 0), (2, 0, 1), (0, 1, 1), (1, 1, 1),] |
546 | ), |
547 | t!( |
548 | over360, |
549 | &["foo" , "foofoo" ], |
550 | "foofoo" , |
551 | &[(0, 0, 3), (1, 0, 6), (0, 3, 6)] |
552 | ), |
553 | ]; |
554 | |
555 | /* |
556 | Iterators of anchored overlapping searches were removed from the API in |
557 | after 0.7, but we leave the tests commented out for posterity. |
558 | /// Like OVERLAPPING, but for anchored searches. |
559 | const ANCHORED_OVERLAPPING: &'static [SearchTest] = &[ |
560 | t!(aover000, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]), |
561 | t!(aover010, &["bcd", "cd", "b", "abcd"], "abcd", &[(3, 0, 4)]), |
562 | t!(aover020, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4)]), |
563 | t!(aover030, &["bcd", "abcd", "cd"], "abcd", &[(1, 0, 4)]), |
564 | t!(aover040, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4)]), |
565 | t!(aover050, &["abc", "bc"], "zazabcz", &[]), |
566 | t!(aover100, &["ab", "ba"], "abababa", &[(0, 0, 2)]), |
567 | t!(aover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (1, 0, 3)]), |
568 | t!(aover300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]), |
569 | t!(aover310, &["", ""], "a", &[(0, 0, 0), (1, 0, 0)]), |
570 | t!(aover320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1)]), |
571 | t!(aover330, &["", "a", ""], "a", &[(0, 0, 0), (2, 0, 0), (1, 0, 1)]), |
572 | t!(aover340, &["a", "", ""], "a", &[(1, 0, 0), (2, 0, 0), (0, 0, 1)]), |
573 | t!(aover350, &["", "", "a"], "a", &[(0, 0, 0), (1, 0, 0), (2, 0, 1)]), |
574 | t!(aover360, &["foo", "foofoo"], "foofoo", &[(0, 0, 3), (1, 0, 6)]), |
575 | ]; |
576 | */ |
577 | |
578 | /// Tests for ASCII case insensitivity. |
579 | /// |
580 | /// These tests should all have the same behavior regardless of match semantics |
581 | /// or whether the search is overlapping. |
582 | const ASCII_CASE_INSENSITIVE: &'static [SearchTest] = &[ |
583 | t!(acasei000, &["a" ], "A" , &[(0, 0, 1)]), |
584 | t!(acasei010, &["Samwise" ], "SAMWISE" , &[(0, 0, 7)]), |
585 | t!(acasei011, &["Samwise" ], "SAMWISE.abcd" , &[(0, 0, 7)]), |
586 | t!(acasei020, &["fOoBaR" ], "quux foobar baz" , &[(0, 5, 11)]), |
587 | ]; |
588 | |
589 | /// Like ASCII_CASE_INSENSITIVE, but specifically for non-overlapping tests. |
590 | const ASCII_CASE_INSENSITIVE_NON_OVERLAPPING: &'static [SearchTest] = &[ |
591 | t!(acasei000, &["foo" , "FOO" ], "fOo" , &[(0, 0, 3)]), |
592 | t!(acasei000, &["FOO" , "foo" ], "fOo" , &[(0, 0, 3)]), |
593 | t!(acasei010, &["abc" , "def" ], "abcdef" , &[(0, 0, 3), (1, 3, 6)]), |
594 | ]; |
595 | |
596 | /// Like ASCII_CASE_INSENSITIVE, but specifically for overlapping tests. |
597 | const ASCII_CASE_INSENSITIVE_OVERLAPPING: &'static [SearchTest] = &[ |
598 | t!(acasei000, &["foo" , "FOO" ], "fOo" , &[(0, 0, 3), (1, 0, 3)]), |
599 | t!(acasei001, &["FOO" , "foo" ], "fOo" , &[(0, 0, 3), (1, 0, 3)]), |
600 | // This is a regression test from: |
601 | // https://github.com/BurntSushi/aho-corasick/issues/68 |
602 | // Previously, it was reporting a duplicate (1, 3, 6) match. |
603 | t!( |
604 | acasei010, |
605 | &["abc" , "def" , "abcdef" ], |
606 | "abcdef" , |
607 | &[(0, 0, 3), (2, 0, 6), (1, 3, 6)] |
608 | ), |
609 | ]; |
610 | |
611 | /// Regression tests that are applied to all Aho-Corasick combinations. |
612 | /// |
613 | /// If regression tests are needed for specific match semantics, then add them |
614 | /// to the appropriate group above. |
615 | const REGRESSION: &'static [SearchTest] = &[ |
616 | t!(regression010, &["inf" , "ind" ], "infind" , &[(0, 0, 3), (1, 3, 6),]), |
617 | t!(regression020, &["ind" , "inf" ], "infind" , &[(1, 0, 3), (0, 3, 6),]), |
618 | t!( |
619 | regression030, |
620 | &["libcore/" , "libstd/" ], |
621 | "libcore/char/methods.rs" , |
622 | &[(0, 0, 8),] |
623 | ), |
624 | t!( |
625 | regression040, |
626 | &["libstd/" , "libcore/" ], |
627 | "libcore/char/methods.rs" , |
628 | &[(1, 0, 8),] |
629 | ), |
630 | t!( |
631 | regression050, |
632 | &[" \x00\x00\x01" , " \x00\x00\x00" ], |
633 | " \x00\x00\x00" , |
634 | &[(1, 0, 3),] |
635 | ), |
636 | t!( |
637 | regression060, |
638 | &[" \x00\x00\x00" , " \x00\x00\x01" ], |
639 | " \x00\x00\x00" , |
640 | &[(0, 0, 3),] |
641 | ), |
642 | ]; |
643 | |
644 | // Now define a test for each combination of things above that we want to run. |
645 | // Since there are a few different combinations for each collection of tests, |
646 | // we define a couple of macros to avoid repetition drudgery. The testconfig |
647 | // macro constructs the automaton from a given match kind, and runs the search |
648 | // tests one-by-one over the given collection. The `with` parameter allows one |
649 | // to configure the builder with additional parameters. The testcombo macro |
650 | // invokes testconfig in precisely this way: it sets up several tests where |
651 | // each one turns a different knob on AhoCorasickBuilder. |
652 | |
653 | macro_rules! testconfig { |
654 | (anchored, $name:ident, $collection:expr, $kind:ident, $with:expr) => { |
655 | #[test] |
656 | fn $name() { |
657 | run_search_tests($collection, |test| { |
658 | let mut builder = AhoCorasick::builder(); |
659 | $with(&mut builder); |
660 | let input = Input::new(test.haystack).anchored(Anchored::Yes); |
661 | builder |
662 | .match_kind(MatchKind::$kind) |
663 | .build(test.patterns) |
664 | .unwrap() |
665 | .try_find_iter(input) |
666 | .unwrap() |
667 | .collect() |
668 | }); |
669 | } |
670 | }; |
671 | (overlapping, $name:ident, $collection:expr, $kind:ident, $with:expr) => { |
672 | #[test] |
673 | fn $name() { |
674 | run_search_tests($collection, |test| { |
675 | let mut builder = AhoCorasick::builder(); |
676 | $with(&mut builder); |
677 | builder |
678 | .match_kind(MatchKind::$kind) |
679 | .build(test.patterns) |
680 | .unwrap() |
681 | .find_overlapping_iter(test.haystack) |
682 | .collect() |
683 | }); |
684 | } |
685 | }; |
686 | (stream, $name:ident, $collection:expr, $kind:ident, $with:expr) => { |
687 | #[test] |
688 | fn $name() { |
689 | run_stream_search_tests($collection, |test| { |
690 | let buf = std::io::BufReader::with_capacity( |
691 | 1, |
692 | test.haystack.as_bytes(), |
693 | ); |
694 | let mut builder = AhoCorasick::builder(); |
695 | $with(&mut builder); |
696 | builder |
697 | .match_kind(MatchKind::$kind) |
698 | .build(test.patterns) |
699 | .unwrap() |
700 | .stream_find_iter(buf) |
701 | .map(|result| result.unwrap()) |
702 | .collect() |
703 | }); |
704 | } |
705 | }; |
706 | ($name:ident, $collection:expr, $kind:ident, $with:expr) => { |
707 | #[test] |
708 | fn $name() { |
709 | run_search_tests($collection, |test| { |
710 | let mut builder = AhoCorasick::builder(); |
711 | $with(&mut builder); |
712 | builder |
713 | .match_kind(MatchKind::$kind) |
714 | .build(test.patterns) |
715 | .unwrap() |
716 | .find_iter(test.haystack) |
717 | .collect() |
718 | }); |
719 | } |
720 | }; |
721 | } |
722 | |
723 | macro_rules! testcombo { |
724 | ($name:ident, $collection:expr, $kind:ident) => { |
725 | mod $name { |
726 | use super::*; |
727 | |
728 | testconfig!(default, $collection, $kind, |_| ()); |
729 | testconfig!( |
730 | nfa_default, |
731 | $collection, |
732 | $kind, |
733 | |b: &mut AhoCorasickBuilder| { |
734 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)); |
735 | } |
736 | ); |
737 | testconfig!( |
738 | nfa_noncontig_no_prefilter, |
739 | $collection, |
740 | $kind, |
741 | |b: &mut AhoCorasickBuilder| { |
742 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) |
743 | .prefilter(false); |
744 | } |
745 | ); |
746 | testconfig!( |
747 | nfa_noncontig_all_sparse, |
748 | $collection, |
749 | $kind, |
750 | |b: &mut AhoCorasickBuilder| { |
751 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) |
752 | .dense_depth(0); |
753 | } |
754 | ); |
755 | testconfig!( |
756 | nfa_noncontig_all_dense, |
757 | $collection, |
758 | $kind, |
759 | |b: &mut AhoCorasickBuilder| { |
760 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) |
761 | .dense_depth(usize::MAX); |
762 | } |
763 | ); |
764 | testconfig!( |
765 | nfa_contig_default, |
766 | $collection, |
767 | $kind, |
768 | |b: &mut AhoCorasickBuilder| { |
769 | b.kind(Some(AhoCorasickKind::ContiguousNFA)); |
770 | } |
771 | ); |
772 | testconfig!( |
773 | nfa_contig_no_prefilter, |
774 | $collection, |
775 | $kind, |
776 | |b: &mut AhoCorasickBuilder| { |
777 | b.kind(Some(AhoCorasickKind::ContiguousNFA)) |
778 | .prefilter(false); |
779 | } |
780 | ); |
781 | testconfig!( |
782 | nfa_contig_all_sparse, |
783 | $collection, |
784 | $kind, |
785 | |b: &mut AhoCorasickBuilder| { |
786 | b.kind(Some(AhoCorasickKind::ContiguousNFA)) |
787 | .dense_depth(0); |
788 | } |
789 | ); |
790 | testconfig!( |
791 | nfa_contig_all_dense, |
792 | $collection, |
793 | $kind, |
794 | |b: &mut AhoCorasickBuilder| { |
795 | b.kind(Some(AhoCorasickKind::ContiguousNFA)) |
796 | .dense_depth(usize::MAX); |
797 | } |
798 | ); |
799 | testconfig!( |
800 | nfa_contig_no_byte_class, |
801 | $collection, |
802 | $kind, |
803 | |b: &mut AhoCorasickBuilder| { |
804 | b.kind(Some(AhoCorasickKind::ContiguousNFA)) |
805 | .byte_classes(false); |
806 | } |
807 | ); |
808 | testconfig!( |
809 | dfa_default, |
810 | $collection, |
811 | $kind, |
812 | |b: &mut AhoCorasickBuilder| { |
813 | b.kind(Some(AhoCorasickKind::DFA)); |
814 | } |
815 | ); |
816 | testconfig!( |
817 | dfa_start_both, |
818 | $collection, |
819 | $kind, |
820 | |b: &mut AhoCorasickBuilder| { |
821 | b.kind(Some(AhoCorasickKind::DFA)) |
822 | .start_kind(StartKind::Both); |
823 | } |
824 | ); |
825 | testconfig!( |
826 | dfa_no_prefilter, |
827 | $collection, |
828 | $kind, |
829 | |b: &mut AhoCorasickBuilder| { |
830 | b.kind(Some(AhoCorasickKind::DFA)).prefilter(false); |
831 | } |
832 | ); |
833 | testconfig!( |
834 | dfa_start_both_no_prefilter, |
835 | $collection, |
836 | $kind, |
837 | |b: &mut AhoCorasickBuilder| { |
838 | b.kind(Some(AhoCorasickKind::DFA)) |
839 | .start_kind(StartKind::Both) |
840 | .prefilter(false); |
841 | } |
842 | ); |
843 | testconfig!( |
844 | dfa_no_byte_class, |
845 | $collection, |
846 | $kind, |
847 | |b: &mut AhoCorasickBuilder| { |
848 | b.kind(Some(AhoCorasickKind::DFA)).byte_classes(false); |
849 | } |
850 | ); |
851 | testconfig!( |
852 | dfa_start_both_no_byte_class, |
853 | $collection, |
854 | $kind, |
855 | |b: &mut AhoCorasickBuilder| { |
856 | b.kind(Some(AhoCorasickKind::DFA)) |
857 | .start_kind(StartKind::Both) |
858 | .byte_classes(false); |
859 | } |
860 | ); |
861 | } |
862 | }; |
863 | } |
864 | |
865 | // Write out the various combinations of match semantics given the variety of |
866 | // configurations tested by 'testcombo!'. |
867 | testcombo!(search_leftmost_longest, AC_LEFTMOST_LONGEST, LeftmostLongest); |
868 | testcombo!(search_leftmost_first, AC_LEFTMOST_FIRST, LeftmostFirst); |
869 | testcombo!( |
870 | search_standard_nonoverlapping, |
871 | AC_STANDARD_NON_OVERLAPPING, |
872 | Standard |
873 | ); |
874 | |
875 | // Write out the overlapping combo by hand since there is only one of them. |
876 | testconfig!( |
877 | overlapping, |
878 | search_standard_overlapping_default, |
879 | AC_STANDARD_OVERLAPPING, |
880 | Standard, |
881 | |_| () |
882 | ); |
883 | testconfig!( |
884 | overlapping, |
885 | search_standard_overlapping_nfa_noncontig_default, |
886 | AC_STANDARD_OVERLAPPING, |
887 | Standard, |
888 | |b: &mut AhoCorasickBuilder| { |
889 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)); |
890 | } |
891 | ); |
892 | testconfig!( |
893 | overlapping, |
894 | search_standard_overlapping_nfa_noncontig_no_prefilter, |
895 | AC_STANDARD_OVERLAPPING, |
896 | Standard, |
897 | |b: &mut AhoCorasickBuilder| { |
898 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)).prefilter(false); |
899 | } |
900 | ); |
901 | testconfig!( |
902 | overlapping, |
903 | search_standard_overlapping_nfa_contig_default, |
904 | AC_STANDARD_OVERLAPPING, |
905 | Standard, |
906 | |b: &mut AhoCorasickBuilder| { |
907 | b.kind(Some(AhoCorasickKind::ContiguousNFA)); |
908 | } |
909 | ); |
910 | testconfig!( |
911 | overlapping, |
912 | search_standard_overlapping_nfa_contig_no_prefilter, |
913 | AC_STANDARD_OVERLAPPING, |
914 | Standard, |
915 | |b: &mut AhoCorasickBuilder| { |
916 | b.kind(Some(AhoCorasickKind::ContiguousNFA)).prefilter(false); |
917 | } |
918 | ); |
919 | testconfig!( |
920 | overlapping, |
921 | search_standard_overlapping_nfa_contig_all_sparse, |
922 | AC_STANDARD_OVERLAPPING, |
923 | Standard, |
924 | |b: &mut AhoCorasickBuilder| { |
925 | b.kind(Some(AhoCorasickKind::ContiguousNFA)).dense_depth(0); |
926 | } |
927 | ); |
928 | testconfig!( |
929 | overlapping, |
930 | search_standard_overlapping_nfa_contig_all_dense, |
931 | AC_STANDARD_OVERLAPPING, |
932 | Standard, |
933 | |b: &mut AhoCorasickBuilder| { |
934 | b.kind(Some(AhoCorasickKind::ContiguousNFA)).dense_depth(usize::MAX); |
935 | } |
936 | ); |
937 | testconfig!( |
938 | overlapping, |
939 | search_standard_overlapping_dfa_default, |
940 | AC_STANDARD_OVERLAPPING, |
941 | Standard, |
942 | |b: &mut AhoCorasickBuilder| { |
943 | b.kind(Some(AhoCorasickKind::DFA)); |
944 | } |
945 | ); |
946 | testconfig!( |
947 | overlapping, |
948 | search_standard_overlapping_dfa_start_both, |
949 | AC_STANDARD_OVERLAPPING, |
950 | Standard, |
951 | |b: &mut AhoCorasickBuilder| { |
952 | b.kind(Some(AhoCorasickKind::DFA)).start_kind(StartKind::Both); |
953 | } |
954 | ); |
955 | testconfig!( |
956 | overlapping, |
957 | search_standard_overlapping_dfa_no_prefilter, |
958 | AC_STANDARD_OVERLAPPING, |
959 | Standard, |
960 | |b: &mut AhoCorasickBuilder| { |
961 | b.kind(Some(AhoCorasickKind::DFA)).prefilter(false); |
962 | } |
963 | ); |
964 | testconfig!( |
965 | overlapping, |
966 | search_standard_overlapping_dfa_start_both_no_prefilter, |
967 | AC_STANDARD_OVERLAPPING, |
968 | Standard, |
969 | |b: &mut AhoCorasickBuilder| { |
970 | b.kind(Some(AhoCorasickKind::DFA)) |
971 | .start_kind(StartKind::Both) |
972 | .prefilter(false); |
973 | } |
974 | ); |
975 | testconfig!( |
976 | overlapping, |
977 | search_standard_overlapping_dfa_no_byte_class, |
978 | AC_STANDARD_OVERLAPPING, |
979 | Standard, |
980 | |b: &mut AhoCorasickBuilder| { |
981 | b.kind(Some(AhoCorasickKind::DFA)).byte_classes(false); |
982 | } |
983 | ); |
984 | testconfig!( |
985 | overlapping, |
986 | search_standard_overlapping_dfa_start_both_no_byte_class, |
987 | AC_STANDARD_OVERLAPPING, |
988 | Standard, |
989 | |b: &mut AhoCorasickBuilder| { |
990 | b.kind(Some(AhoCorasickKind::DFA)) |
991 | .start_kind(StartKind::Both) |
992 | .byte_classes(false); |
993 | } |
994 | ); |
995 | |
996 | // Also write out tests manually for streams, since we only test the standard |
997 | // match semantics. We also don't bother testing different automaton |
998 | // configurations, since those are well covered by tests above. |
999 | #[cfg (feature = "std" )] |
1000 | testconfig!( |
1001 | stream, |
1002 | search_standard_stream_default, |
1003 | AC_STANDARD_NON_OVERLAPPING, |
1004 | Standard, |
1005 | |_| () |
1006 | ); |
1007 | #[cfg (feature = "std" )] |
1008 | testconfig!( |
1009 | stream, |
1010 | search_standard_stream_nfa_noncontig_default, |
1011 | AC_STANDARD_NON_OVERLAPPING, |
1012 | Standard, |
1013 | |b: &mut AhoCorasickBuilder| { |
1014 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)); |
1015 | } |
1016 | ); |
1017 | #[cfg (feature = "std" )] |
1018 | testconfig!( |
1019 | stream, |
1020 | search_standard_stream_nfa_contig_default, |
1021 | AC_STANDARD_NON_OVERLAPPING, |
1022 | Standard, |
1023 | |b: &mut AhoCorasickBuilder| { |
1024 | b.kind(Some(AhoCorasickKind::ContiguousNFA)); |
1025 | } |
1026 | ); |
1027 | #[cfg (feature = "std" )] |
1028 | testconfig!( |
1029 | stream, |
1030 | search_standard_stream_dfa_default, |
1031 | AC_STANDARD_NON_OVERLAPPING, |
1032 | Standard, |
1033 | |b: &mut AhoCorasickBuilder| { |
1034 | b.kind(Some(AhoCorasickKind::DFA)); |
1035 | } |
1036 | ); |
1037 | |
1038 | // Same thing for anchored searches. Write them out manually. |
1039 | testconfig!( |
1040 | anchored, |
1041 | search_standard_anchored_default, |
1042 | AC_STANDARD_ANCHORED_NON_OVERLAPPING, |
1043 | Standard, |
1044 | |b: &mut AhoCorasickBuilder| { |
1045 | b.start_kind(StartKind::Anchored); |
1046 | } |
1047 | ); |
1048 | testconfig!( |
1049 | anchored, |
1050 | search_standard_anchored_nfa_noncontig_default, |
1051 | AC_STANDARD_ANCHORED_NON_OVERLAPPING, |
1052 | Standard, |
1053 | |b: &mut AhoCorasickBuilder| { |
1054 | b.start_kind(StartKind::Anchored) |
1055 | .kind(Some(AhoCorasickKind::NoncontiguousNFA)); |
1056 | } |
1057 | ); |
1058 | testconfig!( |
1059 | anchored, |
1060 | search_standard_anchored_nfa_contig_default, |
1061 | AC_STANDARD_ANCHORED_NON_OVERLAPPING, |
1062 | Standard, |
1063 | |b: &mut AhoCorasickBuilder| { |
1064 | b.start_kind(StartKind::Anchored) |
1065 | .kind(Some(AhoCorasickKind::ContiguousNFA)); |
1066 | } |
1067 | ); |
1068 | testconfig!( |
1069 | anchored, |
1070 | search_standard_anchored_dfa_default, |
1071 | AC_STANDARD_ANCHORED_NON_OVERLAPPING, |
1072 | Standard, |
1073 | |b: &mut AhoCorasickBuilder| { |
1074 | b.start_kind(StartKind::Anchored).kind(Some(AhoCorasickKind::DFA)); |
1075 | } |
1076 | ); |
1077 | testconfig!( |
1078 | anchored, |
1079 | search_standard_anchored_dfa_start_both, |
1080 | AC_STANDARD_ANCHORED_NON_OVERLAPPING, |
1081 | Standard, |
1082 | |b: &mut AhoCorasickBuilder| { |
1083 | b.start_kind(StartKind::Both).kind(Some(AhoCorasickKind::DFA)); |
1084 | } |
1085 | ); |
1086 | testconfig!( |
1087 | anchored, |
1088 | search_leftmost_first_anchored_default, |
1089 | AC_LEFTMOST_FIRST_ANCHORED, |
1090 | LeftmostFirst, |
1091 | |b: &mut AhoCorasickBuilder| { |
1092 | b.start_kind(StartKind::Anchored); |
1093 | } |
1094 | ); |
1095 | testconfig!( |
1096 | anchored, |
1097 | search_leftmost_first_anchored_nfa_noncontig_default, |
1098 | AC_LEFTMOST_FIRST_ANCHORED, |
1099 | LeftmostFirst, |
1100 | |b: &mut AhoCorasickBuilder| { |
1101 | b.start_kind(StartKind::Anchored) |
1102 | .kind(Some(AhoCorasickKind::NoncontiguousNFA)); |
1103 | } |
1104 | ); |
1105 | testconfig!( |
1106 | anchored, |
1107 | search_leftmost_first_anchored_nfa_contig_default, |
1108 | AC_LEFTMOST_FIRST_ANCHORED, |
1109 | LeftmostFirst, |
1110 | |b: &mut AhoCorasickBuilder| { |
1111 | b.start_kind(StartKind::Anchored) |
1112 | .kind(Some(AhoCorasickKind::ContiguousNFA)); |
1113 | } |
1114 | ); |
1115 | testconfig!( |
1116 | anchored, |
1117 | search_leftmost_first_anchored_dfa_default, |
1118 | AC_LEFTMOST_FIRST_ANCHORED, |
1119 | LeftmostFirst, |
1120 | |b: &mut AhoCorasickBuilder| { |
1121 | b.start_kind(StartKind::Anchored).kind(Some(AhoCorasickKind::DFA)); |
1122 | } |
1123 | ); |
1124 | testconfig!( |
1125 | anchored, |
1126 | search_leftmost_first_anchored_dfa_start_both, |
1127 | AC_LEFTMOST_FIRST_ANCHORED, |
1128 | LeftmostFirst, |
1129 | |b: &mut AhoCorasickBuilder| { |
1130 | b.start_kind(StartKind::Both).kind(Some(AhoCorasickKind::DFA)); |
1131 | } |
1132 | ); |
1133 | testconfig!( |
1134 | anchored, |
1135 | search_leftmost_longest_anchored_default, |
1136 | AC_LEFTMOST_LONGEST_ANCHORED, |
1137 | LeftmostLongest, |
1138 | |b: &mut AhoCorasickBuilder| { |
1139 | b.start_kind(StartKind::Anchored); |
1140 | } |
1141 | ); |
1142 | testconfig!( |
1143 | anchored, |
1144 | search_leftmost_longest_anchored_nfa_noncontig_default, |
1145 | AC_LEFTMOST_LONGEST_ANCHORED, |
1146 | LeftmostLongest, |
1147 | |b: &mut AhoCorasickBuilder| { |
1148 | b.start_kind(StartKind::Anchored) |
1149 | .kind(Some(AhoCorasickKind::NoncontiguousNFA)); |
1150 | } |
1151 | ); |
1152 | testconfig!( |
1153 | anchored, |
1154 | search_leftmost_longest_anchored_nfa_contig_default, |
1155 | AC_LEFTMOST_LONGEST_ANCHORED, |
1156 | LeftmostLongest, |
1157 | |b: &mut AhoCorasickBuilder| { |
1158 | b.start_kind(StartKind::Anchored) |
1159 | .kind(Some(AhoCorasickKind::ContiguousNFA)); |
1160 | } |
1161 | ); |
1162 | testconfig!( |
1163 | anchored, |
1164 | search_leftmost_longest_anchored_dfa_default, |
1165 | AC_LEFTMOST_LONGEST_ANCHORED, |
1166 | LeftmostLongest, |
1167 | |b: &mut AhoCorasickBuilder| { |
1168 | b.start_kind(StartKind::Anchored).kind(Some(AhoCorasickKind::DFA)); |
1169 | } |
1170 | ); |
1171 | testconfig!( |
1172 | anchored, |
1173 | search_leftmost_longest_anchored_dfa_start_both, |
1174 | AC_LEFTMOST_LONGEST_ANCHORED, |
1175 | LeftmostLongest, |
1176 | |b: &mut AhoCorasickBuilder| { |
1177 | b.start_kind(StartKind::Both).kind(Some(AhoCorasickKind::DFA)); |
1178 | } |
1179 | ); |
1180 | |
1181 | // And also write out the test combinations for ASCII case insensitivity. |
1182 | testconfig!( |
1183 | acasei_standard_default, |
1184 | &[ASCII_CASE_INSENSITIVE], |
1185 | Standard, |
1186 | |b: &mut AhoCorasickBuilder| { |
1187 | b.prefilter(false).ascii_case_insensitive(true); |
1188 | } |
1189 | ); |
1190 | testconfig!( |
1191 | acasei_standard_nfa_noncontig_default, |
1192 | &[ASCII_CASE_INSENSITIVE], |
1193 | Standard, |
1194 | |b: &mut AhoCorasickBuilder| { |
1195 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) |
1196 | .prefilter(false) |
1197 | .ascii_case_insensitive(true); |
1198 | } |
1199 | ); |
1200 | testconfig!( |
1201 | acasei_standard_nfa_contig_default, |
1202 | &[ASCII_CASE_INSENSITIVE], |
1203 | Standard, |
1204 | |b: &mut AhoCorasickBuilder| { |
1205 | b.kind(Some(AhoCorasickKind::ContiguousNFA)) |
1206 | .prefilter(false) |
1207 | .ascii_case_insensitive(true); |
1208 | } |
1209 | ); |
1210 | testconfig!( |
1211 | acasei_standard_dfa_default, |
1212 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |
1213 | Standard, |
1214 | |b: &mut AhoCorasickBuilder| { |
1215 | b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true); |
1216 | } |
1217 | ); |
1218 | testconfig!( |
1219 | overlapping, |
1220 | acasei_standard_overlapping_default, |
1221 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING], |
1222 | Standard, |
1223 | |b: &mut AhoCorasickBuilder| { |
1224 | b.ascii_case_insensitive(true); |
1225 | } |
1226 | ); |
1227 | testconfig!( |
1228 | overlapping, |
1229 | acasei_standard_overlapping_nfa_noncontig_default, |
1230 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING], |
1231 | Standard, |
1232 | |b: &mut AhoCorasickBuilder| { |
1233 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) |
1234 | .ascii_case_insensitive(true); |
1235 | } |
1236 | ); |
1237 | testconfig!( |
1238 | overlapping, |
1239 | acasei_standard_overlapping_nfa_contig_default, |
1240 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING], |
1241 | Standard, |
1242 | |b: &mut AhoCorasickBuilder| { |
1243 | b.kind(Some(AhoCorasickKind::ContiguousNFA)) |
1244 | .ascii_case_insensitive(true); |
1245 | } |
1246 | ); |
1247 | testconfig!( |
1248 | overlapping, |
1249 | acasei_standard_overlapping_dfa_default, |
1250 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING], |
1251 | Standard, |
1252 | |b: &mut AhoCorasickBuilder| { |
1253 | b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true); |
1254 | } |
1255 | ); |
1256 | testconfig!( |
1257 | acasei_leftmost_first_default, |
1258 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |
1259 | LeftmostFirst, |
1260 | |b: &mut AhoCorasickBuilder| { |
1261 | b.ascii_case_insensitive(true); |
1262 | } |
1263 | ); |
1264 | testconfig!( |
1265 | acasei_leftmost_first_nfa_noncontig_default, |
1266 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |
1267 | LeftmostFirst, |
1268 | |b: &mut AhoCorasickBuilder| { |
1269 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) |
1270 | .ascii_case_insensitive(true); |
1271 | } |
1272 | ); |
1273 | testconfig!( |
1274 | acasei_leftmost_first_nfa_contig_default, |
1275 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |
1276 | LeftmostFirst, |
1277 | |b: &mut AhoCorasickBuilder| { |
1278 | b.kind(Some(AhoCorasickKind::ContiguousNFA)) |
1279 | .ascii_case_insensitive(true); |
1280 | } |
1281 | ); |
1282 | testconfig!( |
1283 | acasei_leftmost_first_dfa_default, |
1284 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |
1285 | LeftmostFirst, |
1286 | |b: &mut AhoCorasickBuilder| { |
1287 | b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true); |
1288 | } |
1289 | ); |
1290 | testconfig!( |
1291 | acasei_leftmost_longest_default, |
1292 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |
1293 | LeftmostLongest, |
1294 | |b: &mut AhoCorasickBuilder| { |
1295 | b.ascii_case_insensitive(true); |
1296 | } |
1297 | ); |
1298 | testconfig!( |
1299 | acasei_leftmost_longest_nfa_noncontig_default, |
1300 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |
1301 | LeftmostLongest, |
1302 | |b: &mut AhoCorasickBuilder| { |
1303 | b.kind(Some(AhoCorasickKind::NoncontiguousNFA)) |
1304 | .ascii_case_insensitive(true); |
1305 | } |
1306 | ); |
1307 | testconfig!( |
1308 | acasei_leftmost_longest_nfa_contig_default, |
1309 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |
1310 | LeftmostLongest, |
1311 | |b: &mut AhoCorasickBuilder| { |
1312 | b.kind(Some(AhoCorasickKind::ContiguousNFA)) |
1313 | .ascii_case_insensitive(true); |
1314 | } |
1315 | ); |
1316 | testconfig!( |
1317 | acasei_leftmost_longest_dfa_default, |
1318 | &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING], |
1319 | LeftmostLongest, |
1320 | |b: &mut AhoCorasickBuilder| { |
1321 | b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true); |
1322 | } |
1323 | ); |
1324 | |
1325 | fn run_search_tests<F: FnMut(&SearchTest) -> Vec<Match>>( |
1326 | which: TestCollection, |
1327 | mut f: F, |
1328 | ) { |
1329 | let get_match_triples = |
1330 | |matches: Vec<Match>| -> Vec<(usize, usize, usize)> { |
1331 | matches |
1332 | .into_iter() |
1333 | .map(|m| (m.pattern().as_usize(), m.start(), m.end())) |
1334 | .collect() |
1335 | }; |
1336 | for &tests in which { |
1337 | for test in tests { |
1338 | assert_eq!( |
1339 | test.matches, |
1340 | get_match_triples(f(&test)).as_slice(), |
1341 | "test: {}, patterns: {:?}, haystack: {:?}" , |
1342 | test.name, |
1343 | test.patterns, |
1344 | test.haystack |
1345 | ); |
1346 | } |
1347 | } |
1348 | } |
1349 | |
1350 | // Like 'run_search_tests', but we skip any tests that contain the empty |
1351 | // pattern because stream searching doesn't support it. |
1352 | #[cfg (feature = "std" )] |
1353 | fn run_stream_search_tests<F: FnMut(&SearchTest) -> Vec<Match>>( |
1354 | which: TestCollection, |
1355 | mut f: F, |
1356 | ) { |
1357 | let get_match_triples = |
1358 | |matches: Vec<Match>| -> Vec<(usize, usize, usize)> { |
1359 | matches |
1360 | .into_iter() |
1361 | .map(|m| (m.pattern().as_usize(), m.start(), m.end())) |
1362 | .collect() |
1363 | }; |
1364 | for &tests in which { |
1365 | for test in tests { |
1366 | if test.patterns.iter().any(|p| p.is_empty()) { |
1367 | continue; |
1368 | } |
1369 | assert_eq!( |
1370 | test.matches, |
1371 | get_match_triples(f(&test)).as_slice(), |
1372 | "test: {}, patterns: {:?}, haystack: {:?}" , |
1373 | test.name, |
1374 | test.patterns, |
1375 | test.haystack |
1376 | ); |
1377 | } |
1378 | } |
1379 | } |
1380 | |
1381 | #[test] |
1382 | fn search_tests_have_unique_names() { |
1383 | let assert = |constname, tests: &[SearchTest]| { |
1384 | let mut seen = HashMap::new(); // map from test name to position |
1385 | for (i, test) in tests.iter().enumerate() { |
1386 | if !seen.contains_key(test.name) { |
1387 | seen.insert(test.name, i); |
1388 | } else { |
1389 | let last = seen[test.name]; |
1390 | panic!( |
1391 | "{} tests have duplicate names at positions {} and {}" , |
1392 | constname, last, i |
1393 | ); |
1394 | } |
1395 | } |
1396 | }; |
1397 | assert("BASICS" , BASICS); |
1398 | assert("STANDARD" , STANDARD); |
1399 | assert("LEFTMOST" , LEFTMOST); |
1400 | assert("LEFTMOST_FIRST" , LEFTMOST_FIRST); |
1401 | assert("LEFTMOST_LONGEST" , LEFTMOST_LONGEST); |
1402 | assert("NON_OVERLAPPING" , NON_OVERLAPPING); |
1403 | assert("OVERLAPPING" , OVERLAPPING); |
1404 | assert("REGRESSION" , REGRESSION); |
1405 | } |
1406 | |
1407 | #[cfg (feature = "std" )] |
1408 | #[test] |
1409 | #[should_panic ] |
1410 | fn stream_not_allowed_leftmost_first() { |
1411 | let fsm = AhoCorasick::builder() |
1412 | .match_kind(MatchKind::LeftmostFirst) |
1413 | .build(None::<String>) |
1414 | .unwrap(); |
1415 | assert_eq!(fsm.stream_find_iter(&b"" [..]).count(), 0); |
1416 | } |
1417 | |
1418 | #[cfg (feature = "std" )] |
1419 | #[test] |
1420 | #[should_panic ] |
1421 | fn stream_not_allowed_leftmost_longest() { |
1422 | let fsm = AhoCorasick::builder() |
1423 | .match_kind(MatchKind::LeftmostLongest) |
1424 | .build(None::<String>) |
1425 | .unwrap(); |
1426 | assert_eq!(fsm.stream_find_iter(&b"" [..]).count(), 0); |
1427 | } |
1428 | |
1429 | #[test] |
1430 | #[should_panic ] |
1431 | fn overlapping_not_allowed_leftmost_first() { |
1432 | let fsm = AhoCorasick::builder() |
1433 | .match_kind(MatchKind::LeftmostFirst) |
1434 | .build(None::<String>) |
1435 | .unwrap(); |
1436 | assert_eq!(fsm.find_overlapping_iter("" ).count(), 0); |
1437 | } |
1438 | |
1439 | #[test] |
1440 | #[should_panic ] |
1441 | fn overlapping_not_allowed_leftmost_longest() { |
1442 | let fsm = AhoCorasick::builder() |
1443 | .match_kind(MatchKind::LeftmostLongest) |
1444 | .build(None::<String>) |
1445 | .unwrap(); |
1446 | assert_eq!(fsm.find_overlapping_iter("" ).count(), 0); |
1447 | } |
1448 | |
1449 | // This tests that if we build an AC matcher with an "unanchored" start kind, |
1450 | // then we can't run an anchored search even if the underlying searcher |
1451 | // supports it. |
1452 | // |
1453 | // The key bit here is that both of the NFAs in this crate unconditionally |
1454 | // support both unanchored and anchored searches, but the DFA does not because |
1455 | // of the added cost of doing so. To avoid the top-level AC matcher sometimes |
1456 | // supporting anchored and sometimes not (depending on which searcher it |
1457 | // chooses to use internally), we ensure that the given 'StartKind' is always |
1458 | // respected. |
1459 | #[test] |
1460 | fn anchored_not_allowed_even_if_technically_available() { |
1461 | let ac = AhoCorasick::builder() |
1462 | .kind(Some(AhoCorasickKind::NoncontiguousNFA)) |
1463 | .start_kind(StartKind::Unanchored) |
1464 | .build(&["foo" ]) |
1465 | .unwrap(); |
1466 | assert!(ac.try_find(Input::new("foo" ).anchored(Anchored::Yes)).is_err()); |
1467 | |
1468 | let ac = AhoCorasick::builder() |
1469 | .kind(Some(AhoCorasickKind::ContiguousNFA)) |
1470 | .start_kind(StartKind::Unanchored) |
1471 | .build(&["foo" ]) |
1472 | .unwrap(); |
1473 | assert!(ac.try_find(Input::new("foo" ).anchored(Anchored::Yes)).is_err()); |
1474 | |
1475 | // For completeness, check that the DFA returns an error too. |
1476 | let ac = AhoCorasick::builder() |
1477 | .kind(Some(AhoCorasickKind::DFA)) |
1478 | .start_kind(StartKind::Unanchored) |
1479 | .build(&["foo" ]) |
1480 | .unwrap(); |
1481 | assert!(ac.try_find(Input::new("foo" ).anchored(Anchored::Yes)).is_err()); |
1482 | } |
1483 | |
1484 | // This is like the test aboved, but with unanchored and anchored flipped. That |
1485 | // is, we asked for an AC searcher with anchored support and we check that |
1486 | // unanchored searches return an error even if the underlying searcher would |
1487 | // technically support it. |
1488 | #[test] |
1489 | fn unanchored_not_allowed_even_if_technically_available() { |
1490 | let ac = AhoCorasick::builder() |
1491 | .kind(Some(AhoCorasickKind::NoncontiguousNFA)) |
1492 | .start_kind(StartKind::Anchored) |
1493 | .build(&["foo" ]) |
1494 | .unwrap(); |
1495 | assert!(ac.try_find(Input::new("foo" ).anchored(Anchored::No)).is_err()); |
1496 | |
1497 | let ac = AhoCorasick::builder() |
1498 | .kind(Some(AhoCorasickKind::ContiguousNFA)) |
1499 | .start_kind(StartKind::Anchored) |
1500 | .build(&["foo" ]) |
1501 | .unwrap(); |
1502 | assert!(ac.try_find(Input::new("foo" ).anchored(Anchored::No)).is_err()); |
1503 | |
1504 | // For completeness, check that the DFA returns an error too. |
1505 | let ac = AhoCorasick::builder() |
1506 | .kind(Some(AhoCorasickKind::DFA)) |
1507 | .start_kind(StartKind::Anchored) |
1508 | .build(&["foo" ]) |
1509 | .unwrap(); |
1510 | assert!(ac.try_find(Input::new("foo" ).anchored(Anchored::No)).is_err()); |
1511 | } |
1512 | |
1513 | // This tests that a prefilter does not cause a search to report a match |
1514 | // outside the bounds provided by the caller. |
1515 | // |
1516 | // This is a regression test for a bug I introduced during the rewrite of most |
1517 | // of the crate after 0.7. It was never released. The tricky part here is |
1518 | // ensuring we get a prefilter that can report matches on its own (such as the |
1519 | // packed searcher). Otherwise, prefilters that report false positives might |
1520 | // have searched past the bounds provided by the caller, but confirming the |
1521 | // match would subsequently fail. |
1522 | #[test] |
1523 | fn prefilter_stays_in_bounds() { |
1524 | let ac = AhoCorasick::builder() |
1525 | .match_kind(MatchKind::LeftmostFirst) |
1526 | .build(&["sam" , "frodo" , "pippin" , "merry" , "gandalf" , "sauron" ]) |
1527 | .unwrap(); |
1528 | let haystack = "foo gandalf" ; |
1529 | assert_eq!(None, ac.find(Input::new(haystack).range(0..10))); |
1530 | } |
1531 | |
1532 | // See: https://github.com/BurntSushi/aho-corasick/issues/44 |
1533 | // |
1534 | // In short, this test ensures that enabling ASCII case insensitivity does not |
1535 | // visit an exponential number of states when filling in failure transitions. |
1536 | #[test] |
1537 | fn regression_ascii_case_insensitive_no_exponential() { |
1538 | let ac = AhoCorasick::builder() |
1539 | .ascii_case_insensitive(true) |
1540 | .build(&["Tsubaki House-Triple Shot Vol01校花三姐妹" ]) |
1541 | .unwrap(); |
1542 | assert!(ac.find("" ).is_none()); |
1543 | } |
1544 | |
1545 | // See: https://github.com/BurntSushi/aho-corasick/issues/53 |
1546 | // |
1547 | // This test ensures that the rare byte prefilter works in a particular corner |
1548 | // case. In particular, the shift offset detected for '/' in the patterns below |
1549 | // was incorrect, leading to a false negative. |
1550 | #[test] |
1551 | fn regression_rare_byte_prefilter() { |
1552 | use crate::AhoCorasick; |
1553 | |
1554 | let ac = AhoCorasick::new(&["ab/j/" , "x/" ]).unwrap(); |
1555 | assert!(ac.is_match("ab/j/" )); |
1556 | } |
1557 | |
1558 | #[test] |
1559 | fn regression_case_insensitive_prefilter() { |
1560 | for c in b'a' ..b'z' { |
1561 | for c2 in b'a' ..b'z' { |
1562 | let c = c as char; |
1563 | let c2 = c2 as char; |
1564 | let needle = format!("{}{}" , c, c2).to_lowercase(); |
1565 | let haystack = needle.to_uppercase(); |
1566 | let ac = AhoCorasick::builder() |
1567 | .ascii_case_insensitive(true) |
1568 | .prefilter(true) |
1569 | .build(&[&needle]) |
1570 | .unwrap(); |
1571 | assert_eq!( |
1572 | 1, |
1573 | ac.find_iter(&haystack).count(), |
1574 | "failed to find {:?} in {:?} \n\nautomaton: \n{:?}" , |
1575 | needle, |
1576 | haystack, |
1577 | ac, |
1578 | ); |
1579 | } |
1580 | } |
1581 | } |
1582 | |
1583 | // See: https://github.com/BurntSushi/aho-corasick/issues/64 |
1584 | // |
1585 | // This occurs when the rare byte prefilter is active. |
1586 | #[cfg (feature = "std" )] |
1587 | #[test] |
1588 | fn regression_stream_rare_byte_prefilter() { |
1589 | use std::io::Read; |
1590 | |
1591 | // NOTE: The test only fails if this ends with j. |
1592 | const MAGIC: [u8; 5] = *b"1234j" ; |
1593 | |
1594 | // NOTE: The test fails for value in 8188..=8191 These value put the string |
1595 | // to search accross two call to read because the buffer size is 64KB by |
1596 | // default. |
1597 | const BEGIN: usize = 65_535; |
1598 | |
1599 | /// This is just a structure that implements Reader. The reader |
1600 | /// implementation will simulate a file filled with 0, except for the MAGIC |
1601 | /// string at offset BEGIN. |
1602 | #[derive(Default)] |
1603 | struct R { |
1604 | read: usize, |
1605 | } |
1606 | |
1607 | impl Read for R { |
1608 | fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> { |
1609 | if self.read > 100000 { |
1610 | return Ok(0); |
1611 | } |
1612 | let mut from = 0; |
1613 | if self.read < BEGIN { |
1614 | from = buf.len().min(BEGIN - self.read); |
1615 | for x in 0..from { |
1616 | buf[x] = 0; |
1617 | } |
1618 | self.read += from; |
1619 | } |
1620 | if self.read >= BEGIN && self.read <= BEGIN + MAGIC.len() { |
1621 | let to = buf.len().min(BEGIN + MAGIC.len() - self.read + from); |
1622 | if to > from { |
1623 | buf[from..to].copy_from_slice( |
1624 | &MAGIC |
1625 | [self.read - BEGIN..self.read - BEGIN + to - from], |
1626 | ); |
1627 | self.read += to - from; |
1628 | from = to; |
1629 | } |
1630 | } |
1631 | for x in from..buf.len() { |
1632 | buf[x] = 0; |
1633 | self.read += 1; |
1634 | } |
1635 | Ok(buf.len()) |
1636 | } |
1637 | } |
1638 | |
1639 | fn run() -> std::io::Result<()> { |
1640 | let aut = AhoCorasick::builder() |
1641 | // Enable byte classes to make debugging the automaton easier. It |
1642 | // should have no effect on the test result. |
1643 | .byte_classes(false) |
1644 | .build(&[&MAGIC]) |
1645 | .unwrap(); |
1646 | |
1647 | // While reading from a vector, it works: |
1648 | let mut buf = alloc::vec![]; |
1649 | R::default().read_to_end(&mut buf)?; |
1650 | let from_whole = aut.find_iter(&buf).next().unwrap().start(); |
1651 | |
1652 | // But using stream_find_iter fails! |
1653 | let mut file = std::io::BufReader::new(R::default()); |
1654 | let begin = aut |
1655 | .stream_find_iter(&mut file) |
1656 | .next() |
1657 | .expect("NOT FOUND!!!!" )? // Panic here |
1658 | .start(); |
1659 | assert_eq!(from_whole, begin); |
1660 | Ok(()) |
1661 | } |
1662 | |
1663 | run().unwrap() |
1664 | } |
1665 | |