1 | use alloc::{boxed::Box, vec, vec::Vec}; |
2 | |
3 | /// A set of "packed pair" test seeds. Each seed serves as the base for the |
4 | /// generation of many other tests. In essence, the seed captures the pair of |
5 | /// bytes we used for a predicate and first byte among our needle. The tests |
6 | /// generated from each seed essentially vary the length of the needle and |
7 | /// haystack, while using the rare/first byte configuration from the seed. |
8 | /// |
9 | /// The purpose of this is to test many different needle/haystack lengths. |
10 | /// In particular, some of the vector optimizations might only have bugs |
11 | /// in haystacks of a certain size. |
12 | const SEEDS: &[Seed] = &[ |
13 | // Why not use different 'first' bytes? It seemed like a good idea to be |
14 | // able to configure it, but when I wrote the test generator below, it |
15 | // didn't seem necessary to use for reasons that I forget. |
16 | Seed { first: b'x' , index1: b'y' , index2: b'z' }, |
17 | Seed { first: b'x' , index1: b'x' , index2: b'z' }, |
18 | Seed { first: b'x' , index1: b'y' , index2: b'x' }, |
19 | Seed { first: b'x' , index1: b'x' , index2: b'x' }, |
20 | Seed { first: b'x' , index1: b'y' , index2: b'y' }, |
21 | ]; |
22 | |
23 | /// Runs a host of "packed pair" search tests. |
24 | /// |
25 | /// These tests specifically look for the occurrence of a possible substring |
26 | /// match based on a pair of bytes matching at the right offsets. |
27 | pub(crate) struct Runner { |
28 | fwd: Option< |
29 | Box< |
30 | dyn FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static, |
31 | >, |
32 | >, |
33 | } |
34 | |
35 | impl Runner { |
36 | /// Create a new test runner for "packed pair" substring search. |
37 | pub(crate) fn new() -> Runner { |
38 | Runner { fwd: None } |
39 | } |
40 | |
41 | /// Run all tests. This panics on the first failure. |
42 | /// |
43 | /// If the implementation being tested returns `None` for a particular |
44 | /// haystack/needle combination, then that test is skipped. |
45 | /// |
46 | /// This runs tests on both the forward and reverse implementations given. |
47 | /// If either (or both) are missing, then tests for that implementation are |
48 | /// skipped. |
49 | pub(crate) fn run(self) { |
50 | if let Some(mut fwd) = self.fwd { |
51 | for seed in SEEDS.iter() { |
52 | for t in seed.generate() { |
53 | match fwd(&t.haystack, &t.needle, t.index1, t.index2) { |
54 | None => continue, |
55 | Some(result) => { |
56 | assert_eq!( |
57 | t.fwd, result, |
58 | "FORWARD, needle: {:?}, haystack: {:?}, \ |
59 | index1: {:?}, index2: {:?}" , |
60 | t.needle, t.haystack, t.index1, t.index2, |
61 | ) |
62 | } |
63 | } |
64 | } |
65 | } |
66 | } |
67 | } |
68 | |
69 | /// Set the implementation for forward "packed pair" substring search. |
70 | /// |
71 | /// If the closure returns `None`, then it is assumed that the given |
72 | /// test cannot be applied to the particular implementation and it is |
73 | /// skipped. For example, if a particular implementation only supports |
74 | /// needles or haystacks for some minimum length. |
75 | /// |
76 | /// If this is not set, then forward "packed pair" search is not tested. |
77 | pub(crate) fn fwd( |
78 | mut self, |
79 | search: impl FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static, |
80 | ) -> Runner { |
81 | self.fwd = Some(Box::new(search)); |
82 | self |
83 | } |
84 | } |
85 | |
86 | /// A test that represents the input and expected output to a "packed pair" |
87 | /// search function. The test should be able to run with any "packed pair" |
88 | /// implementation and get the expected output. |
89 | struct Test { |
90 | haystack: Vec<u8>, |
91 | needle: Vec<u8>, |
92 | index1: u8, |
93 | index2: u8, |
94 | fwd: Option<usize>, |
95 | } |
96 | |
97 | impl Test { |
98 | /// Create a new "packed pair" test from a seed and some given offsets to |
99 | /// the pair of bytes to use as a predicate in the seed's needle. |
100 | /// |
101 | /// If a valid test could not be constructed, then None is returned. |
102 | /// (Currently, we take the approach of massaging tests to be valid |
103 | /// instead of rejecting them outright.) |
104 | fn new( |
105 | seed: Seed, |
106 | index1: usize, |
107 | index2: usize, |
108 | haystack_len: usize, |
109 | needle_len: usize, |
110 | fwd: Option<usize>, |
111 | ) -> Option<Test> { |
112 | let mut index1: u8 = index1.try_into().unwrap(); |
113 | let mut index2: u8 = index2.try_into().unwrap(); |
114 | // The '#' byte is never used in a haystack (unless we're expecting |
115 | // a match), while the '@' byte is never used in a needle. |
116 | let mut haystack = vec![b'@' ; haystack_len]; |
117 | let mut needle = vec![b'#' ; needle_len]; |
118 | needle[0] = seed.first; |
119 | needle[index1 as usize] = seed.index1; |
120 | needle[index2 as usize] = seed.index2; |
121 | // If we're expecting a match, then make sure the needle occurs |
122 | // in the haystack at the expected position. |
123 | if let Some(i) = fwd { |
124 | haystack[i..i + needle.len()].copy_from_slice(&needle); |
125 | } |
126 | // If the operations above lead to rare offsets pointing to the |
127 | // non-first occurrence of a byte, then adjust it. This might lead |
128 | // to redundant tests, but it's simpler than trying to change the |
129 | // generation process I think. |
130 | if let Some(i) = crate::memchr(seed.index1, &needle) { |
131 | index1 = u8::try_from(i).unwrap(); |
132 | } |
133 | if let Some(i) = crate::memchr(seed.index2, &needle) { |
134 | index2 = u8::try_from(i).unwrap(); |
135 | } |
136 | Some(Test { haystack, needle, index1, index2, fwd }) |
137 | } |
138 | } |
139 | |
140 | /// Data that describes a single prefilter test seed. |
141 | #[derive(Clone, Copy)] |
142 | struct Seed { |
143 | first: u8, |
144 | index1: u8, |
145 | index2: u8, |
146 | } |
147 | |
148 | impl Seed { |
149 | const NEEDLE_LENGTH_LIMIT: usize = { |
150 | #[cfg (not(miri))] |
151 | { |
152 | 33 |
153 | } |
154 | #[cfg (miri)] |
155 | { |
156 | 5 |
157 | } |
158 | }; |
159 | |
160 | const HAYSTACK_LENGTH_LIMIT: usize = { |
161 | #[cfg (not(miri))] |
162 | { |
163 | 65 |
164 | } |
165 | #[cfg (miri)] |
166 | { |
167 | 8 |
168 | } |
169 | }; |
170 | |
171 | /// Generate a series of prefilter tests from this seed. |
172 | fn generate(self) -> impl Iterator<Item = Test> { |
173 | let len_start = 2; |
174 | // The iterator below generates *a lot* of tests. The number of |
175 | // tests was chosen somewhat empirically to be "bearable" when |
176 | // running the test suite. |
177 | // |
178 | // We use an iterator here because the collective haystacks of all |
179 | // these test cases add up to enough memory to OOM a conservative |
180 | // sandbox or a small laptop. |
181 | (len_start..=Seed::NEEDLE_LENGTH_LIMIT).flat_map(move |needle_len| { |
182 | let index_start = len_start - 1; |
183 | (index_start..needle_len).flat_map(move |index1| { |
184 | (index1..needle_len).flat_map(move |index2| { |
185 | (needle_len..=Seed::HAYSTACK_LENGTH_LIMIT).flat_map( |
186 | move |haystack_len| { |
187 | Test::new( |
188 | self, |
189 | index1, |
190 | index2, |
191 | haystack_len, |
192 | needle_len, |
193 | None, |
194 | ) |
195 | .into_iter() |
196 | .chain( |
197 | (0..=(haystack_len - needle_len)).flat_map( |
198 | move |output| { |
199 | Test::new( |
200 | self, |
201 | index1, |
202 | index2, |
203 | haystack_len, |
204 | needle_len, |
205 | Some(output), |
206 | ) |
207 | }, |
208 | ), |
209 | ) |
210 | }, |
211 | ) |
212 | }) |
213 | }) |
214 | }) |
215 | } |
216 | } |
217 | |