1use std::{collections::HashMap, format, string::String, vec::Vec};
2
3use crate::{
4 AhoCorasick, AhoCorasickBuilder, AhoCorasickKind, Anchored, Input, Match,
5 MatchKind, StartKind,
6};
7
8/// A description of a single test against an Aho-Corasick automaton.
9///
10/// A single test may not necessarily pass on every configuration of an
11/// Aho-Corasick automaton. The tests are categorized and grouped appropriately
12/// below.
13#[derive(Clone, Debug, Eq, PartialEq)]
14struct SearchTest {
15 /// The name of this test, for debugging.
16 name: &'static str,
17 /// The patterns to search for.
18 patterns: &'static [&'static str],
19 /// The text to search.
20 haystack: &'static str,
21 /// Each match is a triple of (pattern_index, start, end), where
22 /// pattern_index is an index into `patterns` and `start`/`end` are indices
23 /// into `haystack`.
24 matches: &'static [(usize, usize, usize)],
25}
26
27/// Short-hand constructor for SearchTest. We use it a lot below.
28macro_rules! t {
29 ($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => {
30 SearchTest {
31 name: stringify!($name),
32 patterns: $patterns,
33 haystack: $haystack,
34 matches: $matches,
35 }
36 };
37}
38
39/// A collection of test groups.
40type TestCollection = &'static [&'static [SearchTest]];
41
42// Define several collections corresponding to the different type of match
43// semantics supported by Aho-Corasick. These collections have some overlap,
44// but each collection should have some tests that no other collection has.
45
46/// Tests for Aho-Corasick's standard non-overlapping match semantics.
47const AC_STANDARD_NON_OVERLAPPING: TestCollection =
48 &[BASICS, NON_OVERLAPPING, STANDARD, REGRESSION];
49
50/// Tests for Aho-Corasick's anchored standard non-overlapping match semantics.
51const AC_STANDARD_ANCHORED_NON_OVERLAPPING: TestCollection =
52 &[ANCHORED_BASICS, ANCHORED_NON_OVERLAPPING, STANDARD_ANCHORED];
53
54/// Tests for Aho-Corasick's standard overlapping match semantics.
55const AC_STANDARD_OVERLAPPING: TestCollection =
56 &[BASICS, OVERLAPPING, REGRESSION];
57
58/*
59Iterators of anchored overlapping searches were removed from the API in
60after 0.7, but we leave the tests commented out for posterity.
61/// Tests for Aho-Corasick's anchored standard overlapping match semantics.
62const AC_STANDARD_ANCHORED_OVERLAPPING: TestCollection =
63 &[ANCHORED_BASICS, ANCHORED_OVERLAPPING];
64*/
65
66/// Tests for Aho-Corasick's leftmost-first match semantics.
67const AC_LEFTMOST_FIRST: TestCollection =
68 &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_FIRST, REGRESSION];
69
70/// Tests for Aho-Corasick's anchored leftmost-first match semantics.
71const AC_LEFTMOST_FIRST_ANCHORED: TestCollection = &[
72 ANCHORED_BASICS,
73 ANCHORED_NON_OVERLAPPING,
74 ANCHORED_LEFTMOST,
75 ANCHORED_LEFTMOST_FIRST,
76];
77
78/// Tests for Aho-Corasick's leftmost-longest match semantics.
79const AC_LEFTMOST_LONGEST: TestCollection =
80 &[BASICS, NON_OVERLAPPING, LEFTMOST, LEFTMOST_LONGEST, REGRESSION];
81
82/// Tests for Aho-Corasick's anchored leftmost-longest match semantics.
83const AC_LEFTMOST_LONGEST_ANCHORED: TestCollection = &[
84 ANCHORED_BASICS,
85 ANCHORED_NON_OVERLAPPING,
86 ANCHORED_LEFTMOST,
87 ANCHORED_LEFTMOST_LONGEST,
88];
89
90// Now define the individual tests that make up the collections above.
91
92/// A collection of tests for the Aho-Corasick algorithm that should always be
93/// true regardless of match semantics. That is, all combinations of
94/// leftmost-{shortest, first, longest} x {overlapping, non-overlapping}
95/// should produce the same answer.
96const BASICS: &'static [SearchTest] = &[
97 t!(basic000, &[], "", &[]),
98 t!(basic001, &[""], "a", &[(0, 0, 0), (0, 1, 1)]),
99 t!(basic002, &["a"], "", &[]),
100 t!(basic010, &["a"], "a", &[(0, 0, 1)]),
101 t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
102 t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]),
103 t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]),
104 t!(basic050, &["a"], "bba", &[(0, 2, 3)]),
105 t!(basic060, &["a"], "bbb", &[]),
106 t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
107 t!(basic100, &["aa"], "", &[]),
108 t!(basic110, &["aa"], "aa", &[(0, 0, 2)]),
109 t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
110 t!(basic130, &["aa"], "abbab", &[]),
111 t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]),
112 t!(basic200, &["abc"], "abc", &[(0, 0, 3)]),
113 t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
114 t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
115 t!(basic300, &["a", "b"], "", &[]),
116 t!(basic310, &["a", "b"], "z", &[]),
117 t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]),
118 t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]),
119 t!(
120 basic340,
121 &["a", "b"],
122 "abba",
123 &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),]
124 ),
125 t!(
126 basic350,
127 &["b", "a"],
128 "abba",
129 &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),]
130 ),
131 t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]),
132 t!(basic400, &["foo", "bar"], "", &[]),
133 t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]),
134 t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]),
135 t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]),
136 t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]),
137 t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]),
138 t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]),
139 t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]),
140 t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]),
141 t!(basic600, &[""], "", &[(0, 0, 0)]),
142 t!(basic610, &[""], "a", &[(0, 0, 0), (0, 1, 1)]),
143 t!(basic620, &[""], "abc", &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]),
144 t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]),
145 t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]),
146 t!(
147 basic720,
148 &["yabcdef", "bcdeyabc", "abcdezghi"],
149 "yabcdezghi",
150 &[(2, 1, 10),]
151 ),
152];
153
154/// A collection of *anchored* tests for the Aho-Corasick algorithm that should
155/// always be true regardless of match semantics. That is, all combinations of
156/// leftmost-{shortest, first, longest} x {overlapping, non-overlapping} should
157/// produce the same answer.
158const ANCHORED_BASICS: &'static [SearchTest] = &[
159 t!(abasic000, &[], "", &[]),
160 t!(abasic001, &[], "a", &[]),
161 t!(abasic002, &[], "abc", &[]),
162 t!(abasic010, &[""], "", &[(0, 0, 0)]),
163 t!(abasic020, &[""], "a", &[(0, 0, 0), (0, 1, 1)]),
164 t!(abasic030, &[""], "abc", &[(0, 0, 0), (0, 1, 1), (0, 2, 2), (0, 3, 3)]),
165 t!(abasic100, &["a"], "a", &[(0, 0, 1)]),
166 t!(abasic110, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
167 t!(abasic120, &["a", "b"], "ab", &[(0, 0, 1), (1, 1, 2)]),
168 t!(abasic130, &["a", "b"], "ba", &[(1, 0, 1), (0, 1, 2)]),
169 t!(abasic140, &["foo", "foofoo"], "foo", &[(0, 0, 3)]),
170 t!(abasic150, &["foofoo", "foo"], "foo", &[(1, 0, 3)]),
171 t!(abasic200, &["foo"], "foofoo foo", &[(0, 0, 3), (0, 3, 6)]),
172];
173
174/// Tests for non-overlapping standard match semantics.
175///
176/// These tests generally shouldn't pass for leftmost-{first,longest}, although
177/// some do in order to write clearer tests. For example, standard000 will
178/// pass with leftmost-first semantics, but standard010 will not. We write
179/// both to emphasize how the match semantics work.
180const STANDARD: &'static [SearchTest] = &[
181 t!(standard000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
182 t!(standard010, &["abcd", "ab"], "abcd", &[(1, 0, 2)]),
183 t!(standard020, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]),
184 t!(standard030, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]),
185 t!(standard040, &["a", ""], "a", &[(1, 0, 0), (1, 1, 1)]),
186 t!(
187 standard400,
188 &["abcd", "bcd", "cd", "b"],
189 "abcd",
190 &[(3, 1, 2), (2, 2, 4),]
191 ),
192 t!(standard410, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
193 t!(standard420, &["", "a"], "aa", &[(0, 0, 0), (0, 1, 1), (0, 2, 2),]),
194 t!(standard430, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
195 t!(standard440, &["a", "", ""], "a", &[(1, 0, 0), (1, 1, 1),]),
196 t!(standard450, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1),]),
197];
198
199/// Like STANDARD, but for anchored searches.
200const STANDARD_ANCHORED: &'static [SearchTest] = &[
201 t!(astandard000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
202 t!(astandard010, &["abcd", "ab"], "abcd", &[(1, 0, 2)]),
203 t!(astandard020, &["abcd", "ab", "abc"], "abcd", &[(1, 0, 2)]),
204 t!(astandard030, &["abcd", "abc", "ab"], "abcd", &[(2, 0, 2)]),
205 t!(astandard040, &["a", ""], "a", &[(1, 0, 0), (1, 1, 1)]),
206 t!(astandard050, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]),
207 t!(astandard410, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
208 t!(astandard420, &["", "a"], "aa", &[(0, 0, 0), (0, 1, 1), (0, 2, 2)]),
209 t!(astandard430, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
210 t!(astandard440, &["a", "", ""], "a", &[(1, 0, 0), (1, 1, 1)]),
211 t!(astandard450, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
212];
213
214/// Tests for non-overlapping leftmost match semantics. These should pass for
215/// both leftmost-first and leftmost-longest match kinds. Stated differently,
216/// among ambiguous matches, the longest match and the match that appeared
217/// first when constructing the automaton should always be the same.
218const LEFTMOST: &'static [SearchTest] = &[
219 t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
220 t!(leftmost010, &["a", ""], "a", &[(0, 0, 1)]),
221 t!(leftmost011, &["a", ""], "ab", &[(0, 0, 1), (1, 2, 2)]),
222 t!(leftmost020, &["", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
223 t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
224 t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
225 t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]),
226 t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
227 t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
228 t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
229 t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
230 t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
231 t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
232 t!(
233 leftmost360,
234 &["abcdefghi", "hz", "abcdefgh"],
235 "abcdefghz",
236 &[(2, 0, 8),]
237 ),
238 t!(
239 leftmost370,
240 &["abcdefghi", "cde", "hz", "abcdefgh"],
241 "abcdefghz",
242 &[(3, 0, 8),]
243 ),
244 t!(
245 leftmost380,
246 &["abcdefghi", "hz", "abcdefgh", "a"],
247 "abcdefghz",
248 &[(2, 0, 8),]
249 ),
250 t!(
251 leftmost390,
252 &["b", "abcdefghi", "hz", "abcdefgh"],
253 "abcdefghz",
254 &[(3, 0, 8),]
255 ),
256 t!(
257 leftmost400,
258 &["h", "abcdefghi", "hz", "abcdefgh"],
259 "abcdefghz",
260 &[(3, 0, 8),]
261 ),
262 t!(
263 leftmost410,
264 &["z", "abcdefghi", "hz", "abcdefgh"],
265 "abcdefghz",
266 &[(3, 0, 8), (0, 8, 9),]
267 ),
268];
269
270/// Like LEFTMOST, but for anchored searches.
271const ANCHORED_LEFTMOST: &'static [SearchTest] = &[
272 t!(aleftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
273 // We shouldn't allow an empty match immediately following a match, right?
274 t!(aleftmost010, &["a", ""], "a", &[(0, 0, 1)]),
275 t!(aleftmost020, &["", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
276 t!(aleftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
277 t!(aleftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
278 t!(aleftmost032, &["ab", "a"], "xayabbbz", &[]),
279 t!(aleftmost300, &["abcd", "bce", "b"], "abce", &[]),
280 t!(aleftmost301, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]),
281 t!(aleftmost310, &["abcd", "ce", "bc"], "abce", &[]),
282 t!(aleftmost320, &["abcd", "bce", "ce", "b"], "abce", &[]),
283 t!(aleftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[]),
284 t!(aleftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
285 t!(aleftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
286 t!(
287 aleftmost360,
288 &["abcdefghi", "hz", "abcdefgh"],
289 "abcdefghz",
290 &[(2, 0, 8),]
291 ),
292 t!(
293 aleftmost370,
294 &["abcdefghi", "cde", "hz", "abcdefgh"],
295 "abcdefghz",
296 &[(3, 0, 8),]
297 ),
298 t!(
299 aleftmost380,
300 &["abcdefghi", "hz", "abcdefgh", "a"],
301 "abcdefghz",
302 &[(2, 0, 8),]
303 ),
304 t!(
305 aleftmost390,
306 &["b", "abcdefghi", "hz", "abcdefgh"],
307 "abcdefghz",
308 &[(3, 0, 8),]
309 ),
310 t!(
311 aleftmost400,
312 &["h", "abcdefghi", "hz", "abcdefgh"],
313 "abcdefghz",
314 &[(3, 0, 8),]
315 ),
316 t!(
317 aleftmost410,
318 &["z", "abcdefghi", "hz", "abcdefgh"],
319 "abcdefghzyz",
320 &[(3, 0, 8), (0, 8, 9)]
321 ),
322];
323
324/// Tests for non-overlapping leftmost-first match semantics. These tests
325/// should generally be specific to leftmost-first, which means they should
326/// generally fail under leftmost-longest semantics.
327const LEFTMOST_FIRST: &'static [SearchTest] = &[
328 t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
329 t!(leftfirst010, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
330 t!(leftfirst011, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
331 t!(leftfirst012, &["a", "", ""], "a", &[(0, 0, 1)]),
332 t!(leftfirst013, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
333 t!(leftfirst014, &["a", ""], "a", &[(0, 0, 1)]),
334 t!(leftfirst015, &["a", ""], "ab", &[(0, 0, 1), (1, 2, 2)]),
335 t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
336 t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
337 t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]),
338 t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
339 t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
340 t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
341 t!(
342 leftfirst310,
343 &["abcd", "b", "bce", "ce"],
344 "abce",
345 &[(1, 1, 2), (3, 2, 4),]
346 ),
347 t!(
348 leftfirst320,
349 &["a", "abcdefghi", "hz", "abcdefgh"],
350 "abcdefghz",
351 &[(0, 0, 1), (2, 7, 9),]
352 ),
353 t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
354 t!(leftfirst400, &["amwix", "samwise", "sam"], "Zsamwix", &[(2, 1, 4)]),
355];
356
357/// Like LEFTMOST_FIRST, but for anchored searches.
358const ANCHORED_LEFTMOST_FIRST: &'static [SearchTest] = &[
359 t!(aleftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
360 t!(aleftfirst010, &["", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
361 t!(aleftfirst011, &["", "a", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
362 t!(aleftfirst012, &["a", "", ""], "a", &[(0, 0, 1)]),
363 t!(aleftfirst013, &["", "", "a"], "a", &[(0, 0, 0), (0, 1, 1)]),
364 t!(aleftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
365 t!(aleftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
366 t!(aleftfirst040, &["a", "ab"], "xayabbbz", &[]),
367 t!(aleftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[]),
368 t!(aleftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[]),
369 t!(aleftfirst300, &["abcd", "b", "bce"], "abce", &[]),
370 t!(aleftfirst310, &["abcd", "b", "bce", "ce"], "abce", &[]),
371 t!(
372 aleftfirst320,
373 &["a", "abcdefghi", "hz", "abcdefgh"],
374 "abcdefghz",
375 &[(0, 0, 1)]
376 ),
377 t!(aleftfirst330, &["a", "abab"], "abab", &[(0, 0, 1)]),
378 t!(aleftfirst400, &["wise", "samwise", "sam"], "samwix", &[(2, 0, 3)]),
379];
380
381/// Tests for non-overlapping leftmost-longest match semantics. These tests
382/// should generally be specific to leftmost-longest, which means they should
383/// generally fail under leftmost-first semantics.
384const LEFTMOST_LONGEST: &'static [SearchTest] = &[
385 t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
386 t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
387 t!(leftlong020, &["", "a"], "a", &[(1, 0, 1)]),
388 t!(leftlong021, &["", "a", ""], "a", &[(1, 0, 1)]),
389 t!(leftlong022, &["a", "", ""], "a", &[(0, 0, 1)]),
390 t!(leftlong023, &["", "", "a"], "a", &[(2, 0, 1)]),
391 t!(leftlong024, &["", "a"], "ab", &[(1, 0, 1), (0, 2, 2)]),
392 t!(leftlong030, &["", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
393 t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
394 t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
395 t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
396 t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
397 t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
398 t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
399 t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
400 t!(
401 leftlong310,
402 &["a", "abcdefghi", "hz", "abcdefgh"],
403 "abcdefghz",
404 &[(3, 0, 8),]
405 ),
406 t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
407 t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]),
408 t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]),
409];
410
411/// Like LEFTMOST_LONGEST, but for anchored searches.
412const ANCHORED_LEFTMOST_LONGEST: &'static [SearchTest] = &[
413 t!(aleftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
414 t!(aleftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
415 t!(aleftlong020, &["", "a"], "a", &[(1, 0, 1)]),
416 t!(aleftlong021, &["", "a", ""], "a", &[(1, 0, 1)]),
417 t!(aleftlong022, &["a", "", ""], "a", &[(0, 0, 1)]),
418 t!(aleftlong023, &["", "", "a"], "a", &[(2, 0, 1)]),
419 t!(aleftlong030, &["", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
420 t!(aleftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
421 t!(aleftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
422 t!(aleftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
423 t!(aleftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
424 t!(aleftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[]),
425 t!(aleftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[]),
426 t!(aleftlong300, &["abcd", "b", "bce"], "abce", &[]),
427 t!(
428 aleftlong310,
429 &["a", "abcdefghi", "hz", "abcdefgh"],
430 "abcdefghz",
431 &[(3, 0, 8),]
432 ),
433 t!(aleftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
434 t!(aleftlong330, &["abcd", "b", "ce"], "abce", &[]),
435 t!(aleftlong340, &["a", "ab"], "xayabbbz", &[]),
436];
437
438/// Tests for non-overlapping match semantics.
439///
440/// Generally these tests shouldn't pass when using overlapping semantics.
441/// These should pass for both standard and leftmost match semantics.
442const NON_OVERLAPPING: &'static [SearchTest] = &[
443 t!(nover010, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
444 t!(nover020, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
445 t!(nover030, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]),
446 t!(
447 nover100,
448 &["ab", "ba"],
449 "abababa",
450 &[(0, 0, 2), (0, 2, 4), (0, 4, 6),]
451 ),
452 t!(nover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]),
453 t!(nover300, &["", ""], "", &[(0, 0, 0),]),
454 t!(nover310, &["", ""], "a", &[(0, 0, 0), (0, 1, 1),]),
455];
456
457/// Like NON_OVERLAPPING, but for anchored searches.
458const ANCHORED_NON_OVERLAPPING: &'static [SearchTest] = &[
459 t!(anover010, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
460 t!(anover020, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
461 t!(anover030, &["abc", "bc"], "zazabcz", &[]),
462 t!(
463 anover100,
464 &["ab", "ba"],
465 "abababa",
466 &[(0, 0, 2), (0, 2, 4), (0, 4, 6)]
467 ),
468 t!(anover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3)]),
469 t!(anover300, &["", ""], "", &[(0, 0, 0)]),
470 t!(anover310, &["", ""], "a", &[(0, 0, 0), (0, 1, 1)]),
471];
472
473/// Tests for overlapping match semantics.
474///
475/// This only supports standard match semantics, since leftmost-{first,longest}
476/// do not support overlapping matches.
477const OVERLAPPING: &'static [SearchTest] = &[
478 t!(
479 over000,
480 &["abcd", "bcd", "cd", "b"],
481 "abcd",
482 &[(3, 1, 2), (0, 0, 4), (1, 1, 4), (2, 2, 4),]
483 ),
484 t!(
485 over010,
486 &["bcd", "cd", "b", "abcd"],
487 "abcd",
488 &[(2, 1, 2), (3, 0, 4), (0, 1, 4), (1, 2, 4),]
489 ),
490 t!(
491 over020,
492 &["abcd", "bcd", "cd"],
493 "abcd",
494 &[(0, 0, 4), (1, 1, 4), (2, 2, 4),]
495 ),
496 t!(
497 over030,
498 &["bcd", "abcd", "cd"],
499 "abcd",
500 &[(1, 0, 4), (0, 1, 4), (2, 2, 4),]
501 ),
502 t!(
503 over040,
504 &["bcd", "cd", "abcd"],
505 "abcd",
506 &[(2, 0, 4), (0, 1, 4), (1, 2, 4),]
507 ),
508 t!(over050, &["abc", "bc"], "zazabcz", &[(0, 3, 6), (1, 4, 6),]),
509 t!(
510 over100,
511 &["ab", "ba"],
512 "abababa",
513 &[(0, 0, 2), (1, 1, 3), (0, 2, 4), (1, 3, 5), (0, 4, 6), (1, 5, 7),]
514 ),
515 t!(
516 over200,
517 &["foo", "foo"],
518 "foobarfoo",
519 &[(0, 0, 3), (1, 0, 3), (0, 6, 9), (1, 6, 9),]
520 ),
521 t!(over300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]),
522 t!(
523 over310,
524 &["", ""],
525 "a",
526 &[(0, 0, 0), (1, 0, 0), (0, 1, 1), (1, 1, 1),]
527 ),
528 t!(over320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1), (0, 1, 1),]),
529 t!(
530 over330,
531 &["", "a", ""],
532 "a",
533 &[(0, 0, 0), (2, 0, 0), (1, 0, 1), (0, 1, 1), (2, 1, 1),]
534 ),
535 t!(
536 over340,
537 &["a", "", ""],
538 "a",
539 &[(1, 0, 0), (2, 0, 0), (0, 0, 1), (1, 1, 1), (2, 1, 1),]
540 ),
541 t!(
542 over350,
543 &["", "", "a"],
544 "a",
545 &[(0, 0, 0), (1, 0, 0), (2, 0, 1), (0, 1, 1), (1, 1, 1),]
546 ),
547 t!(
548 over360,
549 &["foo", "foofoo"],
550 "foofoo",
551 &[(0, 0, 3), (1, 0, 6), (0, 3, 6)]
552 ),
553];
554
555/*
556Iterators of anchored overlapping searches were removed from the API in
557after 0.7, but we leave the tests commented out for posterity.
558/// Like OVERLAPPING, but for anchored searches.
559const ANCHORED_OVERLAPPING: &'static [SearchTest] = &[
560 t!(aover000, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4)]),
561 t!(aover010, &["bcd", "cd", "b", "abcd"], "abcd", &[(3, 0, 4)]),
562 t!(aover020, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4)]),
563 t!(aover030, &["bcd", "abcd", "cd"], "abcd", &[(1, 0, 4)]),
564 t!(aover040, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4)]),
565 t!(aover050, &["abc", "bc"], "zazabcz", &[]),
566 t!(aover100, &["ab", "ba"], "abababa", &[(0, 0, 2)]),
567 t!(aover200, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (1, 0, 3)]),
568 t!(aover300, &["", ""], "", &[(0, 0, 0), (1, 0, 0),]),
569 t!(aover310, &["", ""], "a", &[(0, 0, 0), (1, 0, 0)]),
570 t!(aover320, &["", "a"], "a", &[(0, 0, 0), (1, 0, 1)]),
571 t!(aover330, &["", "a", ""], "a", &[(0, 0, 0), (2, 0, 0), (1, 0, 1)]),
572 t!(aover340, &["a", "", ""], "a", &[(1, 0, 0), (2, 0, 0), (0, 0, 1)]),
573 t!(aover350, &["", "", "a"], "a", &[(0, 0, 0), (1, 0, 0), (2, 0, 1)]),
574 t!(aover360, &["foo", "foofoo"], "foofoo", &[(0, 0, 3), (1, 0, 6)]),
575];
576*/
577
578/// Tests for ASCII case insensitivity.
579///
580/// These tests should all have the same behavior regardless of match semantics
581/// or whether the search is overlapping.
582const ASCII_CASE_INSENSITIVE: &'static [SearchTest] = &[
583 t!(acasei000, &["a"], "A", &[(0, 0, 1)]),
584 t!(acasei010, &["Samwise"], "SAMWISE", &[(0, 0, 7)]),
585 t!(acasei011, &["Samwise"], "SAMWISE.abcd", &[(0, 0, 7)]),
586 t!(acasei020, &["fOoBaR"], "quux foobar baz", &[(0, 5, 11)]),
587];
588
589/// Like ASCII_CASE_INSENSITIVE, but specifically for non-overlapping tests.
590const ASCII_CASE_INSENSITIVE_NON_OVERLAPPING: &'static [SearchTest] = &[
591 t!(acasei000, &["foo", "FOO"], "fOo", &[(0, 0, 3)]),
592 t!(acasei000, &["FOO", "foo"], "fOo", &[(0, 0, 3)]),
593 t!(acasei010, &["abc", "def"], "abcdef", &[(0, 0, 3), (1, 3, 6)]),
594];
595
596/// Like ASCII_CASE_INSENSITIVE, but specifically for overlapping tests.
597const ASCII_CASE_INSENSITIVE_OVERLAPPING: &'static [SearchTest] = &[
598 t!(acasei000, &["foo", "FOO"], "fOo", &[(0, 0, 3), (1, 0, 3)]),
599 t!(acasei001, &["FOO", "foo"], "fOo", &[(0, 0, 3), (1, 0, 3)]),
600 // This is a regression test from:
601 // https://github.com/BurntSushi/aho-corasick/issues/68
602 // Previously, it was reporting a duplicate (1, 3, 6) match.
603 t!(
604 acasei010,
605 &["abc", "def", "abcdef"],
606 "abcdef",
607 &[(0, 0, 3), (2, 0, 6), (1, 3, 6)]
608 ),
609];
610
611/// Regression tests that are applied to all Aho-Corasick combinations.
612///
613/// If regression tests are needed for specific match semantics, then add them
614/// to the appropriate group above.
615const REGRESSION: &'static [SearchTest] = &[
616 t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]),
617 t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]),
618 t!(
619 regression030,
620 &["libcore/", "libstd/"],
621 "libcore/char/methods.rs",
622 &[(0, 0, 8),]
623 ),
624 t!(
625 regression040,
626 &["libstd/", "libcore/"],
627 "libcore/char/methods.rs",
628 &[(1, 0, 8),]
629 ),
630 t!(
631 regression050,
632 &["\x00\x00\x01", "\x00\x00\x00"],
633 "\x00\x00\x00",
634 &[(1, 0, 3),]
635 ),
636 t!(
637 regression060,
638 &["\x00\x00\x00", "\x00\x00\x01"],
639 "\x00\x00\x00",
640 &[(0, 0, 3),]
641 ),
642];
643
644// Now define a test for each combination of things above that we want to run.
645// Since there are a few different combinations for each collection of tests,
646// we define a couple of macros to avoid repetition drudgery. The testconfig
647// macro constructs the automaton from a given match kind, and runs the search
648// tests one-by-one over the given collection. The `with` parameter allows one
649// to configure the builder with additional parameters. The testcombo macro
650// invokes testconfig in precisely this way: it sets up several tests where
651// each one turns a different knob on AhoCorasickBuilder.
652
653macro_rules! testconfig {
654 (anchored, $name:ident, $collection:expr, $kind:ident, $with:expr) => {
655 #[test]
656 fn $name() {
657 run_search_tests($collection, |test| {
658 let mut builder = AhoCorasick::builder();
659 $with(&mut builder);
660 let input = Input::new(test.haystack).anchored(Anchored::Yes);
661 builder
662 .match_kind(MatchKind::$kind)
663 .build(test.patterns)
664 .unwrap()
665 .try_find_iter(input)
666 .unwrap()
667 .collect()
668 });
669 }
670 };
671 (overlapping, $name:ident, $collection:expr, $kind:ident, $with:expr) => {
672 #[test]
673 fn $name() {
674 run_search_tests($collection, |test| {
675 let mut builder = AhoCorasick::builder();
676 $with(&mut builder);
677 builder
678 .match_kind(MatchKind::$kind)
679 .build(test.patterns)
680 .unwrap()
681 .find_overlapping_iter(test.haystack)
682 .collect()
683 });
684 }
685 };
686 (stream, $name:ident, $collection:expr, $kind:ident, $with:expr) => {
687 #[test]
688 fn $name() {
689 run_stream_search_tests($collection, |test| {
690 let buf = std::io::BufReader::with_capacity(
691 1,
692 test.haystack.as_bytes(),
693 );
694 let mut builder = AhoCorasick::builder();
695 $with(&mut builder);
696 builder
697 .match_kind(MatchKind::$kind)
698 .build(test.patterns)
699 .unwrap()
700 .stream_find_iter(buf)
701 .map(|result| result.unwrap())
702 .collect()
703 });
704 }
705 };
706 ($name:ident, $collection:expr, $kind:ident, $with:expr) => {
707 #[test]
708 fn $name() {
709 run_search_tests($collection, |test| {
710 let mut builder = AhoCorasick::builder();
711 $with(&mut builder);
712 builder
713 .match_kind(MatchKind::$kind)
714 .build(test.patterns)
715 .unwrap()
716 .find_iter(test.haystack)
717 .collect()
718 });
719 }
720 };
721}
722
723macro_rules! testcombo {
724 ($name:ident, $collection:expr, $kind:ident) => {
725 mod $name {
726 use super::*;
727
728 testconfig!(default, $collection, $kind, |_| ());
729 testconfig!(
730 nfa_default,
731 $collection,
732 $kind,
733 |b: &mut AhoCorasickBuilder| {
734 b.kind(Some(AhoCorasickKind::NoncontiguousNFA));
735 }
736 );
737 testconfig!(
738 nfa_noncontig_no_prefilter,
739 $collection,
740 $kind,
741 |b: &mut AhoCorasickBuilder| {
742 b.kind(Some(AhoCorasickKind::NoncontiguousNFA))
743 .prefilter(false);
744 }
745 );
746 testconfig!(
747 nfa_noncontig_all_sparse,
748 $collection,
749 $kind,
750 |b: &mut AhoCorasickBuilder| {
751 b.kind(Some(AhoCorasickKind::NoncontiguousNFA))
752 .dense_depth(0);
753 }
754 );
755 testconfig!(
756 nfa_noncontig_all_dense,
757 $collection,
758 $kind,
759 |b: &mut AhoCorasickBuilder| {
760 b.kind(Some(AhoCorasickKind::NoncontiguousNFA))
761 .dense_depth(usize::MAX);
762 }
763 );
764 testconfig!(
765 nfa_contig_default,
766 $collection,
767 $kind,
768 |b: &mut AhoCorasickBuilder| {
769 b.kind(Some(AhoCorasickKind::ContiguousNFA));
770 }
771 );
772 testconfig!(
773 nfa_contig_no_prefilter,
774 $collection,
775 $kind,
776 |b: &mut AhoCorasickBuilder| {
777 b.kind(Some(AhoCorasickKind::ContiguousNFA))
778 .prefilter(false);
779 }
780 );
781 testconfig!(
782 nfa_contig_all_sparse,
783 $collection,
784 $kind,
785 |b: &mut AhoCorasickBuilder| {
786 b.kind(Some(AhoCorasickKind::ContiguousNFA))
787 .dense_depth(0);
788 }
789 );
790 testconfig!(
791 nfa_contig_all_dense,
792 $collection,
793 $kind,
794 |b: &mut AhoCorasickBuilder| {
795 b.kind(Some(AhoCorasickKind::ContiguousNFA))
796 .dense_depth(usize::MAX);
797 }
798 );
799 testconfig!(
800 nfa_contig_no_byte_class,
801 $collection,
802 $kind,
803 |b: &mut AhoCorasickBuilder| {
804 b.kind(Some(AhoCorasickKind::ContiguousNFA))
805 .byte_classes(false);
806 }
807 );
808 testconfig!(
809 dfa_default,
810 $collection,
811 $kind,
812 |b: &mut AhoCorasickBuilder| {
813 b.kind(Some(AhoCorasickKind::DFA));
814 }
815 );
816 testconfig!(
817 dfa_start_both,
818 $collection,
819 $kind,
820 |b: &mut AhoCorasickBuilder| {
821 b.kind(Some(AhoCorasickKind::DFA))
822 .start_kind(StartKind::Both);
823 }
824 );
825 testconfig!(
826 dfa_no_prefilter,
827 $collection,
828 $kind,
829 |b: &mut AhoCorasickBuilder| {
830 b.kind(Some(AhoCorasickKind::DFA)).prefilter(false);
831 }
832 );
833 testconfig!(
834 dfa_start_both_no_prefilter,
835 $collection,
836 $kind,
837 |b: &mut AhoCorasickBuilder| {
838 b.kind(Some(AhoCorasickKind::DFA))
839 .start_kind(StartKind::Both)
840 .prefilter(false);
841 }
842 );
843 testconfig!(
844 dfa_no_byte_class,
845 $collection,
846 $kind,
847 |b: &mut AhoCorasickBuilder| {
848 b.kind(Some(AhoCorasickKind::DFA)).byte_classes(false);
849 }
850 );
851 testconfig!(
852 dfa_start_both_no_byte_class,
853 $collection,
854 $kind,
855 |b: &mut AhoCorasickBuilder| {
856 b.kind(Some(AhoCorasickKind::DFA))
857 .start_kind(StartKind::Both)
858 .byte_classes(false);
859 }
860 );
861 }
862 };
863}
864
865// Write out the various combinations of match semantics given the variety of
866// configurations tested by 'testcombo!'.
867testcombo!(search_leftmost_longest, AC_LEFTMOST_LONGEST, LeftmostLongest);
868testcombo!(search_leftmost_first, AC_LEFTMOST_FIRST, LeftmostFirst);
869testcombo!(
870 search_standard_nonoverlapping,
871 AC_STANDARD_NON_OVERLAPPING,
872 Standard
873);
874
875// Write out the overlapping combo by hand since there is only one of them.
876testconfig!(
877 overlapping,
878 search_standard_overlapping_default,
879 AC_STANDARD_OVERLAPPING,
880 Standard,
881 |_| ()
882);
883testconfig!(
884 overlapping,
885 search_standard_overlapping_nfa_noncontig_default,
886 AC_STANDARD_OVERLAPPING,
887 Standard,
888 |b: &mut AhoCorasickBuilder| {
889 b.kind(Some(AhoCorasickKind::NoncontiguousNFA));
890 }
891);
892testconfig!(
893 overlapping,
894 search_standard_overlapping_nfa_noncontig_no_prefilter,
895 AC_STANDARD_OVERLAPPING,
896 Standard,
897 |b: &mut AhoCorasickBuilder| {
898 b.kind(Some(AhoCorasickKind::NoncontiguousNFA)).prefilter(false);
899 }
900);
901testconfig!(
902 overlapping,
903 search_standard_overlapping_nfa_contig_default,
904 AC_STANDARD_OVERLAPPING,
905 Standard,
906 |b: &mut AhoCorasickBuilder| {
907 b.kind(Some(AhoCorasickKind::ContiguousNFA));
908 }
909);
910testconfig!(
911 overlapping,
912 search_standard_overlapping_nfa_contig_no_prefilter,
913 AC_STANDARD_OVERLAPPING,
914 Standard,
915 |b: &mut AhoCorasickBuilder| {
916 b.kind(Some(AhoCorasickKind::ContiguousNFA)).prefilter(false);
917 }
918);
919testconfig!(
920 overlapping,
921 search_standard_overlapping_nfa_contig_all_sparse,
922 AC_STANDARD_OVERLAPPING,
923 Standard,
924 |b: &mut AhoCorasickBuilder| {
925 b.kind(Some(AhoCorasickKind::ContiguousNFA)).dense_depth(0);
926 }
927);
928testconfig!(
929 overlapping,
930 search_standard_overlapping_nfa_contig_all_dense,
931 AC_STANDARD_OVERLAPPING,
932 Standard,
933 |b: &mut AhoCorasickBuilder| {
934 b.kind(Some(AhoCorasickKind::ContiguousNFA)).dense_depth(usize::MAX);
935 }
936);
937testconfig!(
938 overlapping,
939 search_standard_overlapping_dfa_default,
940 AC_STANDARD_OVERLAPPING,
941 Standard,
942 |b: &mut AhoCorasickBuilder| {
943 b.kind(Some(AhoCorasickKind::DFA));
944 }
945);
946testconfig!(
947 overlapping,
948 search_standard_overlapping_dfa_start_both,
949 AC_STANDARD_OVERLAPPING,
950 Standard,
951 |b: &mut AhoCorasickBuilder| {
952 b.kind(Some(AhoCorasickKind::DFA)).start_kind(StartKind::Both);
953 }
954);
955testconfig!(
956 overlapping,
957 search_standard_overlapping_dfa_no_prefilter,
958 AC_STANDARD_OVERLAPPING,
959 Standard,
960 |b: &mut AhoCorasickBuilder| {
961 b.kind(Some(AhoCorasickKind::DFA)).prefilter(false);
962 }
963);
964testconfig!(
965 overlapping,
966 search_standard_overlapping_dfa_start_both_no_prefilter,
967 AC_STANDARD_OVERLAPPING,
968 Standard,
969 |b: &mut AhoCorasickBuilder| {
970 b.kind(Some(AhoCorasickKind::DFA))
971 .start_kind(StartKind::Both)
972 .prefilter(false);
973 }
974);
975testconfig!(
976 overlapping,
977 search_standard_overlapping_dfa_no_byte_class,
978 AC_STANDARD_OVERLAPPING,
979 Standard,
980 |b: &mut AhoCorasickBuilder| {
981 b.kind(Some(AhoCorasickKind::DFA)).byte_classes(false);
982 }
983);
984testconfig!(
985 overlapping,
986 search_standard_overlapping_dfa_start_both_no_byte_class,
987 AC_STANDARD_OVERLAPPING,
988 Standard,
989 |b: &mut AhoCorasickBuilder| {
990 b.kind(Some(AhoCorasickKind::DFA))
991 .start_kind(StartKind::Both)
992 .byte_classes(false);
993 }
994);
995
996// Also write out tests manually for streams, since we only test the standard
997// match semantics. We also don't bother testing different automaton
998// configurations, since those are well covered by tests above.
999#[cfg(feature = "std")]
1000testconfig!(
1001 stream,
1002 search_standard_stream_default,
1003 AC_STANDARD_NON_OVERLAPPING,
1004 Standard,
1005 |_| ()
1006);
1007#[cfg(feature = "std")]
1008testconfig!(
1009 stream,
1010 search_standard_stream_nfa_noncontig_default,
1011 AC_STANDARD_NON_OVERLAPPING,
1012 Standard,
1013 |b: &mut AhoCorasickBuilder| {
1014 b.kind(Some(AhoCorasickKind::NoncontiguousNFA));
1015 }
1016);
1017#[cfg(feature = "std")]
1018testconfig!(
1019 stream,
1020 search_standard_stream_nfa_contig_default,
1021 AC_STANDARD_NON_OVERLAPPING,
1022 Standard,
1023 |b: &mut AhoCorasickBuilder| {
1024 b.kind(Some(AhoCorasickKind::ContiguousNFA));
1025 }
1026);
1027#[cfg(feature = "std")]
1028testconfig!(
1029 stream,
1030 search_standard_stream_dfa_default,
1031 AC_STANDARD_NON_OVERLAPPING,
1032 Standard,
1033 |b: &mut AhoCorasickBuilder| {
1034 b.kind(Some(AhoCorasickKind::DFA));
1035 }
1036);
1037
1038// Same thing for anchored searches. Write them out manually.
1039testconfig!(
1040 anchored,
1041 search_standard_anchored_default,
1042 AC_STANDARD_ANCHORED_NON_OVERLAPPING,
1043 Standard,
1044 |b: &mut AhoCorasickBuilder| {
1045 b.start_kind(StartKind::Anchored);
1046 }
1047);
1048testconfig!(
1049 anchored,
1050 search_standard_anchored_nfa_noncontig_default,
1051 AC_STANDARD_ANCHORED_NON_OVERLAPPING,
1052 Standard,
1053 |b: &mut AhoCorasickBuilder| {
1054 b.start_kind(StartKind::Anchored)
1055 .kind(Some(AhoCorasickKind::NoncontiguousNFA));
1056 }
1057);
1058testconfig!(
1059 anchored,
1060 search_standard_anchored_nfa_contig_default,
1061 AC_STANDARD_ANCHORED_NON_OVERLAPPING,
1062 Standard,
1063 |b: &mut AhoCorasickBuilder| {
1064 b.start_kind(StartKind::Anchored)
1065 .kind(Some(AhoCorasickKind::ContiguousNFA));
1066 }
1067);
1068testconfig!(
1069 anchored,
1070 search_standard_anchored_dfa_default,
1071 AC_STANDARD_ANCHORED_NON_OVERLAPPING,
1072 Standard,
1073 |b: &mut AhoCorasickBuilder| {
1074 b.start_kind(StartKind::Anchored).kind(Some(AhoCorasickKind::DFA));
1075 }
1076);
1077testconfig!(
1078 anchored,
1079 search_standard_anchored_dfa_start_both,
1080 AC_STANDARD_ANCHORED_NON_OVERLAPPING,
1081 Standard,
1082 |b: &mut AhoCorasickBuilder| {
1083 b.start_kind(StartKind::Both).kind(Some(AhoCorasickKind::DFA));
1084 }
1085);
1086testconfig!(
1087 anchored,
1088 search_leftmost_first_anchored_default,
1089 AC_LEFTMOST_FIRST_ANCHORED,
1090 LeftmostFirst,
1091 |b: &mut AhoCorasickBuilder| {
1092 b.start_kind(StartKind::Anchored);
1093 }
1094);
1095testconfig!(
1096 anchored,
1097 search_leftmost_first_anchored_nfa_noncontig_default,
1098 AC_LEFTMOST_FIRST_ANCHORED,
1099 LeftmostFirst,
1100 |b: &mut AhoCorasickBuilder| {
1101 b.start_kind(StartKind::Anchored)
1102 .kind(Some(AhoCorasickKind::NoncontiguousNFA));
1103 }
1104);
1105testconfig!(
1106 anchored,
1107 search_leftmost_first_anchored_nfa_contig_default,
1108 AC_LEFTMOST_FIRST_ANCHORED,
1109 LeftmostFirst,
1110 |b: &mut AhoCorasickBuilder| {
1111 b.start_kind(StartKind::Anchored)
1112 .kind(Some(AhoCorasickKind::ContiguousNFA));
1113 }
1114);
1115testconfig!(
1116 anchored,
1117 search_leftmost_first_anchored_dfa_default,
1118 AC_LEFTMOST_FIRST_ANCHORED,
1119 LeftmostFirst,
1120 |b: &mut AhoCorasickBuilder| {
1121 b.start_kind(StartKind::Anchored).kind(Some(AhoCorasickKind::DFA));
1122 }
1123);
1124testconfig!(
1125 anchored,
1126 search_leftmost_first_anchored_dfa_start_both,
1127 AC_LEFTMOST_FIRST_ANCHORED,
1128 LeftmostFirst,
1129 |b: &mut AhoCorasickBuilder| {
1130 b.start_kind(StartKind::Both).kind(Some(AhoCorasickKind::DFA));
1131 }
1132);
1133testconfig!(
1134 anchored,
1135 search_leftmost_longest_anchored_default,
1136 AC_LEFTMOST_LONGEST_ANCHORED,
1137 LeftmostLongest,
1138 |b: &mut AhoCorasickBuilder| {
1139 b.start_kind(StartKind::Anchored);
1140 }
1141);
1142testconfig!(
1143 anchored,
1144 search_leftmost_longest_anchored_nfa_noncontig_default,
1145 AC_LEFTMOST_LONGEST_ANCHORED,
1146 LeftmostLongest,
1147 |b: &mut AhoCorasickBuilder| {
1148 b.start_kind(StartKind::Anchored)
1149 .kind(Some(AhoCorasickKind::NoncontiguousNFA));
1150 }
1151);
1152testconfig!(
1153 anchored,
1154 search_leftmost_longest_anchored_nfa_contig_default,
1155 AC_LEFTMOST_LONGEST_ANCHORED,
1156 LeftmostLongest,
1157 |b: &mut AhoCorasickBuilder| {
1158 b.start_kind(StartKind::Anchored)
1159 .kind(Some(AhoCorasickKind::ContiguousNFA));
1160 }
1161);
1162testconfig!(
1163 anchored,
1164 search_leftmost_longest_anchored_dfa_default,
1165 AC_LEFTMOST_LONGEST_ANCHORED,
1166 LeftmostLongest,
1167 |b: &mut AhoCorasickBuilder| {
1168 b.start_kind(StartKind::Anchored).kind(Some(AhoCorasickKind::DFA));
1169 }
1170);
1171testconfig!(
1172 anchored,
1173 search_leftmost_longest_anchored_dfa_start_both,
1174 AC_LEFTMOST_LONGEST_ANCHORED,
1175 LeftmostLongest,
1176 |b: &mut AhoCorasickBuilder| {
1177 b.start_kind(StartKind::Both).kind(Some(AhoCorasickKind::DFA));
1178 }
1179);
1180
1181// And also write out the test combinations for ASCII case insensitivity.
1182testconfig!(
1183 acasei_standard_default,
1184 &[ASCII_CASE_INSENSITIVE],
1185 Standard,
1186 |b: &mut AhoCorasickBuilder| {
1187 b.prefilter(false).ascii_case_insensitive(true);
1188 }
1189);
1190testconfig!(
1191 acasei_standard_nfa_noncontig_default,
1192 &[ASCII_CASE_INSENSITIVE],
1193 Standard,
1194 |b: &mut AhoCorasickBuilder| {
1195 b.kind(Some(AhoCorasickKind::NoncontiguousNFA))
1196 .prefilter(false)
1197 .ascii_case_insensitive(true);
1198 }
1199);
1200testconfig!(
1201 acasei_standard_nfa_contig_default,
1202 &[ASCII_CASE_INSENSITIVE],
1203 Standard,
1204 |b: &mut AhoCorasickBuilder| {
1205 b.kind(Some(AhoCorasickKind::ContiguousNFA))
1206 .prefilter(false)
1207 .ascii_case_insensitive(true);
1208 }
1209);
1210testconfig!(
1211 acasei_standard_dfa_default,
1212 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
1213 Standard,
1214 |b: &mut AhoCorasickBuilder| {
1215 b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true);
1216 }
1217);
1218testconfig!(
1219 overlapping,
1220 acasei_standard_overlapping_default,
1221 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING],
1222 Standard,
1223 |b: &mut AhoCorasickBuilder| {
1224 b.ascii_case_insensitive(true);
1225 }
1226);
1227testconfig!(
1228 overlapping,
1229 acasei_standard_overlapping_nfa_noncontig_default,
1230 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING],
1231 Standard,
1232 |b: &mut AhoCorasickBuilder| {
1233 b.kind(Some(AhoCorasickKind::NoncontiguousNFA))
1234 .ascii_case_insensitive(true);
1235 }
1236);
1237testconfig!(
1238 overlapping,
1239 acasei_standard_overlapping_nfa_contig_default,
1240 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING],
1241 Standard,
1242 |b: &mut AhoCorasickBuilder| {
1243 b.kind(Some(AhoCorasickKind::ContiguousNFA))
1244 .ascii_case_insensitive(true);
1245 }
1246);
1247testconfig!(
1248 overlapping,
1249 acasei_standard_overlapping_dfa_default,
1250 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_OVERLAPPING],
1251 Standard,
1252 |b: &mut AhoCorasickBuilder| {
1253 b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true);
1254 }
1255);
1256testconfig!(
1257 acasei_leftmost_first_default,
1258 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
1259 LeftmostFirst,
1260 |b: &mut AhoCorasickBuilder| {
1261 b.ascii_case_insensitive(true);
1262 }
1263);
1264testconfig!(
1265 acasei_leftmost_first_nfa_noncontig_default,
1266 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
1267 LeftmostFirst,
1268 |b: &mut AhoCorasickBuilder| {
1269 b.kind(Some(AhoCorasickKind::NoncontiguousNFA))
1270 .ascii_case_insensitive(true);
1271 }
1272);
1273testconfig!(
1274 acasei_leftmost_first_nfa_contig_default,
1275 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
1276 LeftmostFirst,
1277 |b: &mut AhoCorasickBuilder| {
1278 b.kind(Some(AhoCorasickKind::ContiguousNFA))
1279 .ascii_case_insensitive(true);
1280 }
1281);
1282testconfig!(
1283 acasei_leftmost_first_dfa_default,
1284 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
1285 LeftmostFirst,
1286 |b: &mut AhoCorasickBuilder| {
1287 b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true);
1288 }
1289);
1290testconfig!(
1291 acasei_leftmost_longest_default,
1292 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
1293 LeftmostLongest,
1294 |b: &mut AhoCorasickBuilder| {
1295 b.ascii_case_insensitive(true);
1296 }
1297);
1298testconfig!(
1299 acasei_leftmost_longest_nfa_noncontig_default,
1300 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
1301 LeftmostLongest,
1302 |b: &mut AhoCorasickBuilder| {
1303 b.kind(Some(AhoCorasickKind::NoncontiguousNFA))
1304 .ascii_case_insensitive(true);
1305 }
1306);
1307testconfig!(
1308 acasei_leftmost_longest_nfa_contig_default,
1309 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
1310 LeftmostLongest,
1311 |b: &mut AhoCorasickBuilder| {
1312 b.kind(Some(AhoCorasickKind::ContiguousNFA))
1313 .ascii_case_insensitive(true);
1314 }
1315);
1316testconfig!(
1317 acasei_leftmost_longest_dfa_default,
1318 &[ASCII_CASE_INSENSITIVE, ASCII_CASE_INSENSITIVE_NON_OVERLAPPING],
1319 LeftmostLongest,
1320 |b: &mut AhoCorasickBuilder| {
1321 b.kind(Some(AhoCorasickKind::DFA)).ascii_case_insensitive(true);
1322 }
1323);
1324
1325fn run_search_tests<F: FnMut(&SearchTest) -> Vec<Match>>(
1326 which: TestCollection,
1327 mut f: F,
1328) {
1329 let get_match_triples =
1330 |matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
1331 matches
1332 .into_iter()
1333 .map(|m| (m.pattern().as_usize(), m.start(), m.end()))
1334 .collect()
1335 };
1336 for &tests in which {
1337 for test in tests {
1338 assert_eq!(
1339 test.matches,
1340 get_match_triples(f(&test)).as_slice(),
1341 "test: {}, patterns: {:?}, haystack: {:?}",
1342 test.name,
1343 test.patterns,
1344 test.haystack
1345 );
1346 }
1347 }
1348}
1349
1350// Like 'run_search_tests', but we skip any tests that contain the empty
1351// pattern because stream searching doesn't support it.
1352#[cfg(feature = "std")]
1353fn run_stream_search_tests<F: FnMut(&SearchTest) -> Vec<Match>>(
1354 which: TestCollection,
1355 mut f: F,
1356) {
1357 let get_match_triples =
1358 |matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
1359 matches
1360 .into_iter()
1361 .map(|m| (m.pattern().as_usize(), m.start(), m.end()))
1362 .collect()
1363 };
1364 for &tests in which {
1365 for test in tests {
1366 if test.patterns.iter().any(|p| p.is_empty()) {
1367 continue;
1368 }
1369 assert_eq!(
1370 test.matches,
1371 get_match_triples(f(&test)).as_slice(),
1372 "test: {}, patterns: {:?}, haystack: {:?}",
1373 test.name,
1374 test.patterns,
1375 test.haystack
1376 );
1377 }
1378 }
1379}
1380
1381#[test]
1382fn search_tests_have_unique_names() {
1383 let assert = |constname, tests: &[SearchTest]| {
1384 let mut seen = HashMap::new(); // map from test name to position
1385 for (i, test) in tests.iter().enumerate() {
1386 if !seen.contains_key(test.name) {
1387 seen.insert(test.name, i);
1388 } else {
1389 let last = seen[test.name];
1390 panic!(
1391 "{} tests have duplicate names at positions {} and {}",
1392 constname, last, i
1393 );
1394 }
1395 }
1396 };
1397 assert("BASICS", BASICS);
1398 assert("STANDARD", STANDARD);
1399 assert("LEFTMOST", LEFTMOST);
1400 assert("LEFTMOST_FIRST", LEFTMOST_FIRST);
1401 assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST);
1402 assert("NON_OVERLAPPING", NON_OVERLAPPING);
1403 assert("OVERLAPPING", OVERLAPPING);
1404 assert("REGRESSION", REGRESSION);
1405}
1406
1407#[cfg(feature = "std")]
1408#[test]
1409#[should_panic]
1410fn stream_not_allowed_leftmost_first() {
1411 let fsm = AhoCorasick::builder()
1412 .match_kind(MatchKind::LeftmostFirst)
1413 .build(None::<String>)
1414 .unwrap();
1415 assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0);
1416}
1417
1418#[cfg(feature = "std")]
1419#[test]
1420#[should_panic]
1421fn stream_not_allowed_leftmost_longest() {
1422 let fsm = AhoCorasick::builder()
1423 .match_kind(MatchKind::LeftmostLongest)
1424 .build(None::<String>)
1425 .unwrap();
1426 assert_eq!(fsm.stream_find_iter(&b""[..]).count(), 0);
1427}
1428
1429#[test]
1430#[should_panic]
1431fn overlapping_not_allowed_leftmost_first() {
1432 let fsm = AhoCorasick::builder()
1433 .match_kind(MatchKind::LeftmostFirst)
1434 .build(None::<String>)
1435 .unwrap();
1436 assert_eq!(fsm.find_overlapping_iter("").count(), 0);
1437}
1438
1439#[test]
1440#[should_panic]
1441fn overlapping_not_allowed_leftmost_longest() {
1442 let fsm = AhoCorasick::builder()
1443 .match_kind(MatchKind::LeftmostLongest)
1444 .build(None::<String>)
1445 .unwrap();
1446 assert_eq!(fsm.find_overlapping_iter("").count(), 0);
1447}
1448
1449// This tests that if we build an AC matcher with an "unanchored" start kind,
1450// then we can't run an anchored search even if the underlying searcher
1451// supports it.
1452//
1453// The key bit here is that both of the NFAs in this crate unconditionally
1454// support both unanchored and anchored searches, but the DFA does not because
1455// of the added cost of doing so. To avoid the top-level AC matcher sometimes
1456// supporting anchored and sometimes not (depending on which searcher it
1457// chooses to use internally), we ensure that the given 'StartKind' is always
1458// respected.
1459#[test]
1460fn anchored_not_allowed_even_if_technically_available() {
1461 let ac = AhoCorasick::builder()
1462 .kind(Some(AhoCorasickKind::NoncontiguousNFA))
1463 .start_kind(StartKind::Unanchored)
1464 .build(&["foo"])
1465 .unwrap();
1466 assert!(ac.try_find(Input::new("foo").anchored(Anchored::Yes)).is_err());
1467
1468 let ac = AhoCorasick::builder()
1469 .kind(Some(AhoCorasickKind::ContiguousNFA))
1470 .start_kind(StartKind::Unanchored)
1471 .build(&["foo"])
1472 .unwrap();
1473 assert!(ac.try_find(Input::new("foo").anchored(Anchored::Yes)).is_err());
1474
1475 // For completeness, check that the DFA returns an error too.
1476 let ac = AhoCorasick::builder()
1477 .kind(Some(AhoCorasickKind::DFA))
1478 .start_kind(StartKind::Unanchored)
1479 .build(&["foo"])
1480 .unwrap();
1481 assert!(ac.try_find(Input::new("foo").anchored(Anchored::Yes)).is_err());
1482}
1483
1484// This is like the test aboved, but with unanchored and anchored flipped. That
1485// is, we asked for an AC searcher with anchored support and we check that
1486// unanchored searches return an error even if the underlying searcher would
1487// technically support it.
1488#[test]
1489fn unanchored_not_allowed_even_if_technically_available() {
1490 let ac = AhoCorasick::builder()
1491 .kind(Some(AhoCorasickKind::NoncontiguousNFA))
1492 .start_kind(StartKind::Anchored)
1493 .build(&["foo"])
1494 .unwrap();
1495 assert!(ac.try_find(Input::new("foo").anchored(Anchored::No)).is_err());
1496
1497 let ac = AhoCorasick::builder()
1498 .kind(Some(AhoCorasickKind::ContiguousNFA))
1499 .start_kind(StartKind::Anchored)
1500 .build(&["foo"])
1501 .unwrap();
1502 assert!(ac.try_find(Input::new("foo").anchored(Anchored::No)).is_err());
1503
1504 // For completeness, check that the DFA returns an error too.
1505 let ac = AhoCorasick::builder()
1506 .kind(Some(AhoCorasickKind::DFA))
1507 .start_kind(StartKind::Anchored)
1508 .build(&["foo"])
1509 .unwrap();
1510 assert!(ac.try_find(Input::new("foo").anchored(Anchored::No)).is_err());
1511}
1512
1513// This tests that a prefilter does not cause a search to report a match
1514// outside the bounds provided by the caller.
1515//
1516// This is a regression test for a bug I introduced during the rewrite of most
1517// of the crate after 0.7. It was never released. The tricky part here is
1518// ensuring we get a prefilter that can report matches on its own (such as the
1519// packed searcher). Otherwise, prefilters that report false positives might
1520// have searched past the bounds provided by the caller, but confirming the
1521// match would subsequently fail.
1522#[test]
1523fn prefilter_stays_in_bounds() {
1524 let ac = AhoCorasick::builder()
1525 .match_kind(MatchKind::LeftmostFirst)
1526 .build(&["sam", "frodo", "pippin", "merry", "gandalf", "sauron"])
1527 .unwrap();
1528 let haystack = "foo gandalf";
1529 assert_eq!(None, ac.find(Input::new(haystack).range(0..10)));
1530}
1531
1532// See: https://github.com/BurntSushi/aho-corasick/issues/44
1533//
1534// In short, this test ensures that enabling ASCII case insensitivity does not
1535// visit an exponential number of states when filling in failure transitions.
1536#[test]
1537fn regression_ascii_case_insensitive_no_exponential() {
1538 let ac = AhoCorasick::builder()
1539 .ascii_case_insensitive(true)
1540 .build(&["Tsubaki House-Triple Shot Vol01校花三姐妹"])
1541 .unwrap();
1542 assert!(ac.find("").is_none());
1543}
1544
1545// See: https://github.com/BurntSushi/aho-corasick/issues/53
1546//
1547// This test ensures that the rare byte prefilter works in a particular corner
1548// case. In particular, the shift offset detected for '/' in the patterns below
1549// was incorrect, leading to a false negative.
1550#[test]
1551fn regression_rare_byte_prefilter() {
1552 use crate::AhoCorasick;
1553
1554 let ac = AhoCorasick::new(&["ab/j/", "x/"]).unwrap();
1555 assert!(ac.is_match("ab/j/"));
1556}
1557
1558#[test]
1559fn regression_case_insensitive_prefilter() {
1560 for c in b'a'..b'z' {
1561 for c2 in b'a'..b'z' {
1562 let c = c as char;
1563 let c2 = c2 as char;
1564 let needle = format!("{}{}", c, c2).to_lowercase();
1565 let haystack = needle.to_uppercase();
1566 let ac = AhoCorasick::builder()
1567 .ascii_case_insensitive(true)
1568 .prefilter(true)
1569 .build(&[&needle])
1570 .unwrap();
1571 assert_eq!(
1572 1,
1573 ac.find_iter(&haystack).count(),
1574 "failed to find {:?} in {:?}\n\nautomaton:\n{:?}",
1575 needle,
1576 haystack,
1577 ac,
1578 );
1579 }
1580 }
1581}
1582
1583// See: https://github.com/BurntSushi/aho-corasick/issues/64
1584//
1585// This occurs when the rare byte prefilter is active.
1586#[cfg(feature = "std")]
1587#[test]
1588fn regression_stream_rare_byte_prefilter() {
1589 use std::io::Read;
1590
1591 // NOTE: The test only fails if this ends with j.
1592 const MAGIC: [u8; 5] = *b"1234j";
1593
1594 // NOTE: The test fails for value in 8188..=8191 These value put the string
1595 // to search accross two call to read because the buffer size is 64KB by
1596 // default.
1597 const BEGIN: usize = 65_535;
1598
1599 /// This is just a structure that implements Reader. The reader
1600 /// implementation will simulate a file filled with 0, except for the MAGIC
1601 /// string at offset BEGIN.
1602 #[derive(Default)]
1603 struct R {
1604 read: usize,
1605 }
1606
1607 impl Read for R {
1608 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
1609 if self.read > 100000 {
1610 return Ok(0);
1611 }
1612 let mut from = 0;
1613 if self.read < BEGIN {
1614 from = buf.len().min(BEGIN - self.read);
1615 for x in 0..from {
1616 buf[x] = 0;
1617 }
1618 self.read += from;
1619 }
1620 if self.read >= BEGIN && self.read <= BEGIN + MAGIC.len() {
1621 let to = buf.len().min(BEGIN + MAGIC.len() - self.read + from);
1622 if to > from {
1623 buf[from..to].copy_from_slice(
1624 &MAGIC
1625 [self.read - BEGIN..self.read - BEGIN + to - from],
1626 );
1627 self.read += to - from;
1628 from = to;
1629 }
1630 }
1631 for x in from..buf.len() {
1632 buf[x] = 0;
1633 self.read += 1;
1634 }
1635 Ok(buf.len())
1636 }
1637 }
1638
1639 fn run() -> std::io::Result<()> {
1640 let aut = AhoCorasick::builder()
1641 // Enable byte classes to make debugging the automaton easier. It
1642 // should have no effect on the test result.
1643 .byte_classes(false)
1644 .build(&[&MAGIC])
1645 .unwrap();
1646
1647 // While reading from a vector, it works:
1648 let mut buf = alloc::vec![];
1649 R::default().read_to_end(&mut buf)?;
1650 let from_whole = aut.find_iter(&buf).next().unwrap().start();
1651
1652 // But using stream_find_iter fails!
1653 let mut file = std::io::BufReader::new(R::default());
1654 let begin = aut
1655 .stream_find_iter(&mut file)
1656 .next()
1657 .expect("NOT FOUND!!!!")? // Panic here
1658 .start();
1659 assert_eq!(from_whole, begin);
1660 Ok(())
1661 }
1662
1663 run().unwrap()
1664}
1665