1use std::collections::HashMap;
2
3use alloc::{
4 format,
5 string::{String, ToString},
6 vec,
7 vec::Vec,
8};
9
10use crate::{
11 packed::{Config, MatchKind},
12 util::search::Match,
13};
14
15/// A description of a single test against a multi-pattern searcher.
16///
17/// A single test may not necessarily pass on every configuration of a
18/// searcher. The tests are categorized and grouped appropriately below.
19#[derive(Clone, Debug, Eq, PartialEq)]
20struct SearchTest {
21 /// The name of this test, for debugging.
22 name: &'static str,
23 /// The patterns to search for.
24 patterns: &'static [&'static str],
25 /// The text to search.
26 haystack: &'static str,
27 /// Each match is a triple of (pattern_index, start, end), where
28 /// pattern_index is an index into `patterns` and `start`/`end` are indices
29 /// into `haystack`.
30 matches: &'static [(usize, usize, usize)],
31}
32
33struct SearchTestOwned {
34 offset: usize,
35 name: String,
36 patterns: Vec<String>,
37 haystack: String,
38 matches: Vec<(usize, usize, usize)>,
39}
40
41impl SearchTest {
42 fn variations(&self) -> Vec<SearchTestOwned> {
43 let count = if cfg!(miri) { 1 } else { 261 };
44 let mut tests = vec![];
45 for i in 0..count {
46 tests.push(self.offset_prefix(i));
47 tests.push(self.offset_suffix(i));
48 tests.push(self.offset_both(i));
49 }
50 tests
51 }
52
53 fn offset_both(&self, off: usize) -> SearchTestOwned {
54 SearchTestOwned {
55 offset: off,
56 name: self.name.to_string(),
57 patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
58 haystack: format!(
59 "{}{}{}",
60 "Z".repeat(off),
61 self.haystack,
62 "Z".repeat(off)
63 ),
64 matches: self
65 .matches
66 .iter()
67 .map(|&(id, s, e)| (id, s + off, e + off))
68 .collect(),
69 }
70 }
71
72 fn offset_prefix(&self, off: usize) -> SearchTestOwned {
73 SearchTestOwned {
74 offset: off,
75 name: self.name.to_string(),
76 patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
77 haystack: format!("{}{}", "Z".repeat(off), self.haystack),
78 matches: self
79 .matches
80 .iter()
81 .map(|&(id, s, e)| (id, s + off, e + off))
82 .collect(),
83 }
84 }
85
86 fn offset_suffix(&self, off: usize) -> SearchTestOwned {
87 SearchTestOwned {
88 offset: off,
89 name: self.name.to_string(),
90 patterns: self.patterns.iter().map(|s| s.to_string()).collect(),
91 haystack: format!("{}{}", self.haystack, "Z".repeat(off)),
92 matches: self.matches.to_vec(),
93 }
94 }
95}
96
97/// Short-hand constructor for SearchTest. We use it a lot below.
98macro_rules! t {
99 ($name:ident, $patterns:expr, $haystack:expr, $matches:expr) => {
100 SearchTest {
101 name: stringify!($name),
102 patterns: $patterns,
103 haystack: $haystack,
104 matches: $matches,
105 }
106 };
107}
108
109/// A collection of test groups.
110type TestCollection = &'static [&'static [SearchTest]];
111
112// Define several collections corresponding to the different type of match
113// semantics supported. These collections have some overlap, but each
114// collection should have some tests that no other collection has.
115
116/// Tests for leftmost-first match semantics.
117const PACKED_LEFTMOST_FIRST: TestCollection =
118 &[BASICS, LEFTMOST, LEFTMOST_FIRST, REGRESSION, TEDDY];
119
120/// Tests for leftmost-longest match semantics.
121const PACKED_LEFTMOST_LONGEST: TestCollection =
122 &[BASICS, LEFTMOST, LEFTMOST_LONGEST, REGRESSION, TEDDY];
123
124// Now define the individual tests that make up the collections above.
125
126/// A collection of tests for the that should always be true regardless of
127/// match semantics. That is, all combinations of leftmost-{first, longest}
128/// should produce the same answer.
129const BASICS: &'static [SearchTest] = &[
130 t!(basic001, &["a"], "", &[]),
131 t!(basic010, &["a"], "a", &[(0, 0, 1)]),
132 t!(basic020, &["a"], "aa", &[(0, 0, 1), (0, 1, 2)]),
133 t!(basic030, &["a"], "aaa", &[(0, 0, 1), (0, 1, 2), (0, 2, 3)]),
134 t!(basic040, &["a"], "aba", &[(0, 0, 1), (0, 2, 3)]),
135 t!(basic050, &["a"], "bba", &[(0, 2, 3)]),
136 t!(basic060, &["a"], "bbb", &[]),
137 t!(basic070, &["a"], "bababbbba", &[(0, 1, 2), (0, 3, 4), (0, 8, 9)]),
138 t!(basic100, &["aa"], "", &[]),
139 t!(basic110, &["aa"], "aa", &[(0, 0, 2)]),
140 t!(basic120, &["aa"], "aabbaa", &[(0, 0, 2), (0, 4, 6)]),
141 t!(basic130, &["aa"], "abbab", &[]),
142 t!(basic140, &["aa"], "abbabaa", &[(0, 5, 7)]),
143 t!(basic150, &["aaa"], "aaa", &[(0, 0, 3)]),
144 t!(basic200, &["abc"], "abc", &[(0, 0, 3)]),
145 t!(basic210, &["abc"], "zazabzabcz", &[(0, 6, 9)]),
146 t!(basic220, &["abc"], "zazabczabcz", &[(0, 3, 6), (0, 7, 10)]),
147 t!(basic230, &["abcd"], "abcd", &[(0, 0, 4)]),
148 t!(basic240, &["abcd"], "zazabzabcdz", &[(0, 6, 10)]),
149 t!(basic250, &["abcd"], "zazabcdzabcdz", &[(0, 3, 7), (0, 8, 12)]),
150 t!(basic300, &["a", "b"], "", &[]),
151 t!(basic310, &["a", "b"], "z", &[]),
152 t!(basic320, &["a", "b"], "b", &[(1, 0, 1)]),
153 t!(basic330, &["a", "b"], "a", &[(0, 0, 1)]),
154 t!(
155 basic340,
156 &["a", "b"],
157 "abba",
158 &[(0, 0, 1), (1, 1, 2), (1, 2, 3), (0, 3, 4),]
159 ),
160 t!(
161 basic350,
162 &["b", "a"],
163 "abba",
164 &[(1, 0, 1), (0, 1, 2), (0, 2, 3), (1, 3, 4),]
165 ),
166 t!(basic360, &["abc", "bc"], "xbc", &[(1, 1, 3),]),
167 t!(basic400, &["foo", "bar"], "", &[]),
168 t!(basic410, &["foo", "bar"], "foobar", &[(0, 0, 3), (1, 3, 6),]),
169 t!(basic420, &["foo", "bar"], "barfoo", &[(1, 0, 3), (0, 3, 6),]),
170 t!(basic430, &["foo", "bar"], "foofoo", &[(0, 0, 3), (0, 3, 6),]),
171 t!(basic440, &["foo", "bar"], "barbar", &[(1, 0, 3), (1, 3, 6),]),
172 t!(basic450, &["foo", "bar"], "bafofoo", &[(0, 4, 7),]),
173 t!(basic460, &["bar", "foo"], "bafofoo", &[(1, 4, 7),]),
174 t!(basic470, &["foo", "bar"], "fobabar", &[(1, 4, 7),]),
175 t!(basic480, &["bar", "foo"], "fobabar", &[(0, 4, 7),]),
176 t!(basic700, &["yabcdef", "abcdezghi"], "yabcdefghi", &[(0, 0, 7),]),
177 t!(basic710, &["yabcdef", "abcdezghi"], "yabcdezghi", &[(1, 1, 10),]),
178 t!(
179 basic720,
180 &["yabcdef", "bcdeyabc", "abcdezghi"],
181 "yabcdezghi",
182 &[(2, 1, 10),]
183 ),
184 t!(basic810, &["abcd", "bcd", "cd"], "abcd", &[(0, 0, 4),]),
185 t!(basic820, &["bcd", "cd", "abcd"], "abcd", &[(2, 0, 4),]),
186 t!(basic830, &["abc", "bc"], "zazabcz", &[(0, 3, 6),]),
187 t!(
188 basic840,
189 &["ab", "ba"],
190 "abababa",
191 &[(0, 0, 2), (0, 2, 4), (0, 4, 6),]
192 ),
193 t!(basic850, &["foo", "foo"], "foobarfoo", &[(0, 0, 3), (0, 6, 9),]),
194];
195
196/// Tests for leftmost match semantics. These should pass for both
197/// leftmost-first and leftmost-longest match kinds. Stated differently, among
198/// ambiguous matches, the longest match and the match that appeared first when
199/// constructing the automaton should always be the same.
200const LEFTMOST: &'static [SearchTest] = &[
201 t!(leftmost000, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
202 t!(leftmost030, &["a", "ab"], "aa", &[(0, 0, 1), (0, 1, 2)]),
203 t!(leftmost031, &["ab", "a"], "aa", &[(1, 0, 1), (1, 1, 2)]),
204 t!(leftmost032, &["ab", "a"], "xayabbbz", &[(1, 1, 2), (0, 3, 5)]),
205 t!(leftmost300, &["abcd", "bce", "b"], "abce", &[(1, 1, 4)]),
206 t!(leftmost310, &["abcd", "ce", "bc"], "abce", &[(2, 1, 3)]),
207 t!(leftmost320, &["abcd", "bce", "ce", "b"], "abce", &[(1, 1, 4)]),
208 t!(leftmost330, &["abcd", "bce", "cz", "bc"], "abcz", &[(3, 1, 3)]),
209 t!(leftmost340, &["bce", "cz", "bc"], "bcz", &[(2, 0, 2)]),
210 t!(leftmost350, &["abc", "bd", "ab"], "abd", &[(2, 0, 2)]),
211 t!(
212 leftmost360,
213 &["abcdefghi", "hz", "abcdefgh"],
214 "abcdefghz",
215 &[(2, 0, 8),]
216 ),
217 t!(
218 leftmost370,
219 &["abcdefghi", "cde", "hz", "abcdefgh"],
220 "abcdefghz",
221 &[(3, 0, 8),]
222 ),
223 t!(
224 leftmost380,
225 &["abcdefghi", "hz", "abcdefgh", "a"],
226 "abcdefghz",
227 &[(2, 0, 8),]
228 ),
229 t!(
230 leftmost390,
231 &["b", "abcdefghi", "hz", "abcdefgh"],
232 "abcdefghz",
233 &[(3, 0, 8),]
234 ),
235 t!(
236 leftmost400,
237 &["h", "abcdefghi", "hz", "abcdefgh"],
238 "abcdefghz",
239 &[(3, 0, 8),]
240 ),
241 t!(
242 leftmost410,
243 &["z", "abcdefghi", "hz", "abcdefgh"],
244 "abcdefghz",
245 &[(3, 0, 8), (0, 8, 9),]
246 ),
247];
248
249/// Tests for non-overlapping leftmost-first match semantics. These tests
250/// should generally be specific to leftmost-first, which means they should
251/// generally fail under leftmost-longest semantics.
252const LEFTMOST_FIRST: &'static [SearchTest] = &[
253 t!(leftfirst000, &["ab", "abcd"], "abcd", &[(0, 0, 2)]),
254 t!(leftfirst020, &["abcd", "ab"], "abcd", &[(0, 0, 4)]),
255 t!(leftfirst030, &["ab", "ab"], "abcd", &[(0, 0, 2)]),
256 t!(leftfirst040, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (0, 3, 4)]),
257 t!(leftfirst100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(1, 1, 5)]),
258 t!(leftfirst110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
259 t!(leftfirst300, &["abcd", "b", "bce"], "abce", &[(1, 1, 2)]),
260 t!(
261 leftfirst310,
262 &["abcd", "b", "bce", "ce"],
263 "abce",
264 &[(1, 1, 2), (3, 2, 4),]
265 ),
266 t!(
267 leftfirst320,
268 &["a", "abcdefghi", "hz", "abcdefgh"],
269 "abcdefghz",
270 &[(0, 0, 1), (2, 7, 9),]
271 ),
272 t!(leftfirst330, &["a", "abab"], "abab", &[(0, 0, 1), (0, 2, 3)]),
273 t!(
274 leftfirst340,
275 &["abcdef", "x", "x", "x", "x", "x", "x", "abcde"],
276 "abcdef",
277 &[(0, 0, 6)]
278 ),
279];
280
281/// Tests for non-overlapping leftmost-longest match semantics. These tests
282/// should generally be specific to leftmost-longest, which means they should
283/// generally fail under leftmost-first semantics.
284const LEFTMOST_LONGEST: &'static [SearchTest] = &[
285 t!(leftlong000, &["ab", "abcd"], "abcd", &[(1, 0, 4)]),
286 t!(leftlong010, &["abcd", "bcd", "cd", "b"], "abcd", &[(0, 0, 4),]),
287 t!(leftlong040, &["a", "ab"], "a", &[(0, 0, 1)]),
288 t!(leftlong050, &["a", "ab"], "ab", &[(1, 0, 2)]),
289 t!(leftlong060, &["ab", "a"], "a", &[(1, 0, 1)]),
290 t!(leftlong070, &["ab", "a"], "ab", &[(0, 0, 2)]),
291 t!(leftlong100, &["abcdefg", "bcde", "bcdef"], "abcdef", &[(2, 1, 6)]),
292 t!(leftlong110, &["abcdefg", "bcdef", "bcde"], "abcdef", &[(1, 1, 6)]),
293 t!(leftlong300, &["abcd", "b", "bce"], "abce", &[(2, 1, 4)]),
294 t!(
295 leftlong310,
296 &["a", "abcdefghi", "hz", "abcdefgh"],
297 "abcdefghz",
298 &[(3, 0, 8),]
299 ),
300 t!(leftlong320, &["a", "abab"], "abab", &[(1, 0, 4)]),
301 t!(leftlong330, &["abcd", "b", "ce"], "abce", &[(1, 1, 2), (2, 2, 4),]),
302 t!(leftlong340, &["a", "ab"], "xayabbbz", &[(0, 1, 2), (1, 3, 5)]),
303];
304
305/// Regression tests that are applied to all combinations.
306///
307/// If regression tests are needed for specific match semantics, then add them
308/// to the appropriate group above.
309const REGRESSION: &'static [SearchTest] = &[
310 t!(regression010, &["inf", "ind"], "infind", &[(0, 0, 3), (1, 3, 6),]),
311 t!(regression020, &["ind", "inf"], "infind", &[(1, 0, 3), (0, 3, 6),]),
312 t!(
313 regression030,
314 &["libcore/", "libstd/"],
315 "libcore/char/methods.rs",
316 &[(0, 0, 8),]
317 ),
318 t!(
319 regression040,
320 &["libstd/", "libcore/"],
321 "libcore/char/methods.rs",
322 &[(1, 0, 8),]
323 ),
324 t!(
325 regression050,
326 &["\x00\x00\x01", "\x00\x00\x00"],
327 "\x00\x00\x00",
328 &[(1, 0, 3),]
329 ),
330 t!(
331 regression060,
332 &["\x00\x00\x00", "\x00\x00\x01"],
333 "\x00\x00\x00",
334 &[(0, 0, 3),]
335 ),
336];
337
338const TEDDY: &'static [SearchTest] = &[
339 t!(
340 teddy010,
341 &["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"],
342 "abcdefghijk",
343 &[
344 (0, 0, 1),
345 (1, 1, 2),
346 (2, 2, 3),
347 (3, 3, 4),
348 (4, 4, 5),
349 (5, 5, 6),
350 (6, 6, 7),
351 (7, 7, 8),
352 (8, 8, 9),
353 (9, 9, 10),
354 (10, 10, 11)
355 ]
356 ),
357 t!(
358 teddy020,
359 &["ab", "bc", "cd", "de", "ef", "fg", "gh", "hi", "ij", "jk", "kl"],
360 "abcdefghijk",
361 &[(0, 0, 2), (2, 2, 4), (4, 4, 6), (6, 6, 8), (8, 8, 10),]
362 ),
363 t!(
364 teddy030,
365 &["abc"],
366 "abcdefghijklmnopqrstuvwxyzabcdefghijk",
367 &[(0, 0, 3), (0, 26, 29)]
368 ),
369];
370
371// Now define a test for each combination of things above that we want to run.
372// Since there are a few different combinations for each collection of tests,
373// we define a couple of macros to avoid repetition drudgery. The testconfig
374// macro constructs the automaton from a given match kind, and runs the search
375// tests one-by-one over the given collection. The `with` parameter allows one
376// to configure the config with additional parameters. The testcombo macro
377// invokes testconfig in precisely this way: it sets up several tests where
378// each one turns a different knob on Config.
379
380macro_rules! testconfig {
381 ($name:ident, $collection:expr, $with:expr) => {
382 #[test]
383 fn $name() {
384 run_search_tests($collection, |test| {
385 let mut config = Config::new();
386 $with(&mut config);
387 let mut builder = config.builder();
388 builder.extend(test.patterns.iter().map(|p| p.as_bytes()));
389 let searcher = match builder.build() {
390 Some(searcher) => searcher,
391 None => {
392 // For x86-64 and aarch64, not building a searcher is
393 // probably a bug, so be loud.
394 if cfg!(any(
395 target_arch = "x86_64",
396 target_arch = "aarch64"
397 )) {
398 panic!("failed to build packed searcher")
399 }
400 return None;
401 }
402 };
403 Some(searcher.find_iter(&test.haystack).collect())
404 });
405 }
406 };
407}
408
409testconfig!(
410 search_default_leftmost_first,
411 PACKED_LEFTMOST_FIRST,
412 |_: &mut Config| {}
413);
414
415testconfig!(
416 search_default_leftmost_longest,
417 PACKED_LEFTMOST_LONGEST,
418 |c: &mut Config| {
419 c.match_kind(MatchKind::LeftmostLongest);
420 }
421);
422
423testconfig!(
424 search_teddy_leftmost_first,
425 PACKED_LEFTMOST_FIRST,
426 |c: &mut Config| {
427 c.only_teddy(true);
428 }
429);
430
431testconfig!(
432 search_teddy_leftmost_longest,
433 PACKED_LEFTMOST_LONGEST,
434 |c: &mut Config| {
435 c.only_teddy(true).match_kind(MatchKind::LeftmostLongest);
436 }
437);
438
439testconfig!(
440 search_teddy_ssse3_leftmost_first,
441 PACKED_LEFTMOST_FIRST,
442 |c: &mut Config| {
443 c.only_teddy(true);
444 #[cfg(target_arch = "x86_64")]
445 if std::is_x86_feature_detected!("ssse3") {
446 c.only_teddy_256bit(Some(false));
447 }
448 }
449);
450
451testconfig!(
452 search_teddy_ssse3_leftmost_longest,
453 PACKED_LEFTMOST_LONGEST,
454 |c: &mut Config| {
455 c.only_teddy(true).match_kind(MatchKind::LeftmostLongest);
456 #[cfg(target_arch = "x86_64")]
457 if std::is_x86_feature_detected!("ssse3") {
458 c.only_teddy_256bit(Some(false));
459 }
460 }
461);
462
463testconfig!(
464 search_teddy_avx2_leftmost_first,
465 PACKED_LEFTMOST_FIRST,
466 |c: &mut Config| {
467 c.only_teddy(true);
468 #[cfg(target_arch = "x86_64")]
469 if std::is_x86_feature_detected!("avx2") {
470 c.only_teddy_256bit(Some(true));
471 }
472 }
473);
474
475testconfig!(
476 search_teddy_avx2_leftmost_longest,
477 PACKED_LEFTMOST_LONGEST,
478 |c: &mut Config| {
479 c.only_teddy(true).match_kind(MatchKind::LeftmostLongest);
480 #[cfg(target_arch = "x86_64")]
481 if std::is_x86_feature_detected!("avx2") {
482 c.only_teddy_256bit(Some(true));
483 }
484 }
485);
486
487testconfig!(
488 search_teddy_fat_leftmost_first,
489 PACKED_LEFTMOST_FIRST,
490 |c: &mut Config| {
491 c.only_teddy(true);
492 #[cfg(target_arch = "x86_64")]
493 if std::is_x86_feature_detected!("avx2") {
494 c.only_teddy_fat(Some(true));
495 }
496 }
497);
498
499testconfig!(
500 search_teddy_fat_leftmost_longest,
501 PACKED_LEFTMOST_LONGEST,
502 |c: &mut Config| {
503 c.only_teddy(true).match_kind(MatchKind::LeftmostLongest);
504 #[cfg(target_arch = "x86_64")]
505 if std::is_x86_feature_detected!("avx2") {
506 c.only_teddy_fat(Some(true));
507 }
508 }
509);
510
511testconfig!(
512 search_rabinkarp_leftmost_first,
513 PACKED_LEFTMOST_FIRST,
514 |c: &mut Config| {
515 c.only_rabin_karp(true);
516 }
517);
518
519testconfig!(
520 search_rabinkarp_leftmost_longest,
521 PACKED_LEFTMOST_LONGEST,
522 |c: &mut Config| {
523 c.only_rabin_karp(true).match_kind(MatchKind::LeftmostLongest);
524 }
525);
526
527#[test]
528fn search_tests_have_unique_names() {
529 let assert = |constname, tests: &[SearchTest]| {
530 let mut seen = HashMap::new(); // map from test name to position
531 for (i, test) in tests.iter().enumerate() {
532 if !seen.contains_key(test.name) {
533 seen.insert(test.name, i);
534 } else {
535 let last = seen[test.name];
536 panic!(
537 "{} tests have duplicate names at positions {} and {}",
538 constname, last, i
539 );
540 }
541 }
542 };
543 assert("BASICS", BASICS);
544 assert("LEFTMOST", LEFTMOST);
545 assert("LEFTMOST_FIRST", LEFTMOST_FIRST);
546 assert("LEFTMOST_LONGEST", LEFTMOST_LONGEST);
547 assert("REGRESSION", REGRESSION);
548 assert("TEDDY", TEDDY);
549}
550
551fn run_search_tests<F: FnMut(&SearchTestOwned) -> Option<Vec<Match>>>(
552 which: TestCollection,
553 mut f: F,
554) {
555 let get_match_triples =
556 |matches: Vec<Match>| -> Vec<(usize, usize, usize)> {
557 matches
558 .into_iter()
559 .map(|m| (m.pattern().as_usize(), m.start(), m.end()))
560 .collect()
561 };
562 for &tests in which {
563 for spec in tests {
564 for test in spec.variations() {
565 let results = match f(&test) {
566 None => continue,
567 Some(results) => results,
568 };
569 assert_eq!(
570 test.matches,
571 get_match_triples(results).as_slice(),
572 "test: {}, patterns: {:?}, haystack(len={:?}): {:?}, \
573 offset: {:?}",
574 test.name,
575 test.patterns,
576 test.haystack.len(),
577 test.haystack,
578 test.offset,
579 );
580 }
581 }
582 }
583}
584