1use alloc::{boxed::Box, vec, vec::Vec};
2
3/// A set of "packed pair" test seeds. Each seed serves as the base for the
4/// generation of many other tests. In essence, the seed captures the pair of
5/// bytes we used for a predicate and first byte among our needle. The tests
6/// generated from each seed essentially vary the length of the needle and
7/// haystack, while using the rare/first byte configuration from the seed.
8///
9/// The purpose of this is to test many different needle/haystack lengths.
10/// In particular, some of the vector optimizations might only have bugs
11/// in haystacks of a certain size.
12const SEEDS: &[Seed] = &[
13 // Why not use different 'first' bytes? It seemed like a good idea to be
14 // able to configure it, but when I wrote the test generator below, it
15 // didn't seem necessary to use for reasons that I forget.
16 Seed { first: b'x', index1: b'y', index2: b'z' },
17 Seed { first: b'x', index1: b'x', index2: b'z' },
18 Seed { first: b'x', index1: b'y', index2: b'x' },
19 Seed { first: b'x', index1: b'x', index2: b'x' },
20 Seed { first: b'x', index1: b'y', index2: b'y' },
21];
22
23/// Runs a host of "packed pair" search tests.
24///
25/// These tests specifically look for the occurrence of a possible substring
26/// match based on a pair of bytes matching at the right offsets.
27pub(crate) struct Runner {
28 fwd: Option<
29 Box<
30 dyn FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static,
31 >,
32 >,
33}
34
35impl Runner {
36 /// Create a new test runner for "packed pair" substring search.
37 pub(crate) fn new() -> Runner {
38 Runner { fwd: None }
39 }
40
41 /// Run all tests. This panics on the first failure.
42 ///
43 /// If the implementation being tested returns `None` for a particular
44 /// haystack/needle combination, then that test is skipped.
45 ///
46 /// This runs tests on both the forward and reverse implementations given.
47 /// If either (or both) are missing, then tests for that implementation are
48 /// skipped.
49 pub(crate) fn run(self) {
50 if let Some(mut fwd) = self.fwd {
51 for seed in SEEDS.iter() {
52 for t in seed.generate() {
53 match fwd(&t.haystack, &t.needle, t.index1, t.index2) {
54 None => continue,
55 Some(result) => {
56 assert_eq!(
57 t.fwd, result,
58 "FORWARD, needle: {:?}, haystack: {:?}, \
59 index1: {:?}, index2: {:?}",
60 t.needle, t.haystack, t.index1, t.index2,
61 )
62 }
63 }
64 }
65 }
66 }
67 }
68
69 /// Set the implementation for forward "packed pair" substring search.
70 ///
71 /// If the closure returns `None`, then it is assumed that the given
72 /// test cannot be applied to the particular implementation and it is
73 /// skipped. For example, if a particular implementation only supports
74 /// needles or haystacks for some minimum length.
75 ///
76 /// If this is not set, then forward "packed pair" search is not tested.
77 pub(crate) fn fwd(
78 mut self,
79 search: impl FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static,
80 ) -> Runner {
81 self.fwd = Some(Box::new(search));
82 self
83 }
84}
85
86/// A test that represents the input and expected output to a "packed pair"
87/// search function. The test should be able to run with any "packed pair"
88/// implementation and get the expected output.
89struct Test {
90 haystack: Vec<u8>,
91 needle: Vec<u8>,
92 index1: u8,
93 index2: u8,
94 fwd: Option<usize>,
95}
96
97impl Test {
98 /// Create a new "packed pair" test from a seed and some given offsets to
99 /// the pair of bytes to use as a predicate in the seed's needle.
100 ///
101 /// If a valid test could not be constructed, then None is returned.
102 /// (Currently, we take the approach of massaging tests to be valid
103 /// instead of rejecting them outright.)
104 fn new(
105 seed: Seed,
106 index1: usize,
107 index2: usize,
108 haystack_len: usize,
109 needle_len: usize,
110 fwd: Option<usize>,
111 ) -> Option<Test> {
112 let mut index1: u8 = index1.try_into().unwrap();
113 let mut index2: u8 = index2.try_into().unwrap();
114 // The '#' byte is never used in a haystack (unless we're expecting
115 // a match), while the '@' byte is never used in a needle.
116 let mut haystack = vec![b'@'; haystack_len];
117 let mut needle = vec![b'#'; needle_len];
118 needle[0] = seed.first;
119 needle[index1 as usize] = seed.index1;
120 needle[index2 as usize] = seed.index2;
121 // If we're expecting a match, then make sure the needle occurs
122 // in the haystack at the expected position.
123 if let Some(i) = fwd {
124 haystack[i..i + needle.len()].copy_from_slice(&needle);
125 }
126 // If the operations above lead to rare offsets pointing to the
127 // non-first occurrence of a byte, then adjust it. This might lead
128 // to redundant tests, but it's simpler than trying to change the
129 // generation process I think.
130 if let Some(i) = crate::memchr(seed.index1, &needle) {
131 index1 = u8::try_from(i).unwrap();
132 }
133 if let Some(i) = crate::memchr(seed.index2, &needle) {
134 index2 = u8::try_from(i).unwrap();
135 }
136 Some(Test { haystack, needle, index1, index2, fwd })
137 }
138}
139
140/// Data that describes a single prefilter test seed.
141#[derive(Clone, Copy)]
142struct Seed {
143 first: u8,
144 index1: u8,
145 index2: u8,
146}
147
148impl Seed {
149 const NEEDLE_LENGTH_LIMIT: usize = {
150 #[cfg(not(miri))]
151 {
152 33
153 }
154 #[cfg(miri)]
155 {
156 5
157 }
158 };
159
160 const HAYSTACK_LENGTH_LIMIT: usize = {
161 #[cfg(not(miri))]
162 {
163 65
164 }
165 #[cfg(miri)]
166 {
167 8
168 }
169 };
170
171 /// Generate a series of prefilter tests from this seed.
172 fn generate(self) -> impl Iterator<Item = Test> {
173 let len_start = 2;
174 // The iterator below generates *a lot* of tests. The number of
175 // tests was chosen somewhat empirically to be "bearable" when
176 // running the test suite.
177 //
178 // We use an iterator here because the collective haystacks of all
179 // these test cases add up to enough memory to OOM a conservative
180 // sandbox or a small laptop.
181 (len_start..=Seed::NEEDLE_LENGTH_LIMIT).flat_map(move |needle_len| {
182 let index_start = len_start - 1;
183 (index_start..needle_len).flat_map(move |index1| {
184 (index1..needle_len).flat_map(move |index2| {
185 (needle_len..=Seed::HAYSTACK_LENGTH_LIMIT).flat_map(
186 move |haystack_len| {
187 Test::new(
188 self,
189 index1,
190 index2,
191 haystack_len,
192 needle_len,
193 None,
194 )
195 .into_iter()
196 .chain(
197 (0..=(haystack_len - needle_len)).flat_map(
198 move |output| {
199 Test::new(
200 self,
201 index1,
202 index2,
203 haystack_len,
204 needle_len,
205 Some(output),
206 )
207 },
208 ),
209 )
210 },
211 )
212 })
213 })
214 })
215 }
216}
217