1 | use alloc::{ |
2 | string::{String, ToString}, |
3 | vec, |
4 | vec::Vec, |
5 | }; |
6 | |
7 | use crate::ext::Byte; |
8 | |
9 | pub(crate) mod naive; |
10 | #[macro_use ] |
11 | pub(crate) mod prop; |
12 | |
13 | const SEEDS: &'static [Seed] = &[ |
14 | Seed { haystack: "a" , needles: &[b'a' ], positions: &[0] }, |
15 | Seed { haystack: "aa" , needles: &[b'a' ], positions: &[0, 1] }, |
16 | Seed { haystack: "aaa" , needles: &[b'a' ], positions: &[0, 1, 2] }, |
17 | Seed { haystack: "" , needles: &[b'a' ], positions: &[] }, |
18 | Seed { haystack: "z" , needles: &[b'a' ], positions: &[] }, |
19 | Seed { haystack: "zz" , needles: &[b'a' ], positions: &[] }, |
20 | Seed { haystack: "zza" , needles: &[b'a' ], positions: &[2] }, |
21 | Seed { haystack: "zaza" , needles: &[b'a' ], positions: &[1, 3] }, |
22 | Seed { haystack: "zzza" , needles: &[b'a' ], positions: &[3] }, |
23 | Seed { haystack: " \x00a" , needles: &[b'a' ], positions: &[1] }, |
24 | Seed { haystack: " \x00" , needles: &[b' \x00' ], positions: &[0] }, |
25 | Seed { haystack: " \x00\x00" , needles: &[b' \x00' ], positions: &[0, 1] }, |
26 | Seed { haystack: " \x00a \x00" , needles: &[b' \x00' ], positions: &[0, 2] }, |
27 | Seed { haystack: "zzzzzzzzzzzzzzzza" , needles: &[b'a' ], positions: &[16] }, |
28 | Seed { |
29 | haystack: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza" , |
30 | needles: &[b'a' ], |
31 | positions: &[32], |
32 | }, |
33 | // two needles (applied to memchr2 + memchr3) |
34 | Seed { haystack: "az" , needles: &[b'a' , b'z' ], positions: &[0, 1] }, |
35 | Seed { haystack: "az" , needles: &[b'a' , b'z' ], positions: &[0, 1] }, |
36 | Seed { haystack: "az" , needles: &[b'x' , b'y' ], positions: &[] }, |
37 | Seed { haystack: "az" , needles: &[b'a' , b'y' ], positions: &[0] }, |
38 | Seed { haystack: "az" , needles: &[b'x' , b'z' ], positions: &[1] }, |
39 | Seed { haystack: "yyyyaz" , needles: &[b'a' , b'z' ], positions: &[4, 5] }, |
40 | Seed { haystack: "yyyyaz" , needles: &[b'z' , b'a' ], positions: &[4, 5] }, |
41 | // three needles (applied to memchr3) |
42 | Seed { |
43 | haystack: "xyz" , |
44 | needles: &[b'x' , b'y' , b'z' ], |
45 | positions: &[0, 1, 2], |
46 | }, |
47 | Seed { |
48 | haystack: "zxy" , |
49 | needles: &[b'x' , b'y' , b'z' ], |
50 | positions: &[0, 1, 2], |
51 | }, |
52 | Seed { haystack: "zxy" , needles: &[b'x' , b'a' , b'z' ], positions: &[0, 1] }, |
53 | Seed { haystack: "zxy" , needles: &[b't' , b'a' , b'z' ], positions: &[0] }, |
54 | Seed { haystack: "yxz" , needles: &[b't' , b'a' , b'z' ], positions: &[2] }, |
55 | ]; |
56 | |
57 | /// Runs a host of substring search tests. |
58 | /// |
59 | /// This has support for "partial" substring search implementations only work |
60 | /// for a subset of needles/haystacks. For example, the "packed pair" substring |
61 | /// search implementation only works for haystacks of some minimum length based |
62 | /// of the pair of bytes selected and the size of the vector used. |
63 | pub(crate) struct Runner { |
64 | needle_len: usize, |
65 | } |
66 | |
67 | impl Runner { |
68 | /// Create a new test runner for forward and reverse byte search |
69 | /// implementations. |
70 | /// |
71 | /// The `needle_len` given must be at most `3` and at least `1`. It |
72 | /// corresponds to the number of needle bytes to search for. |
73 | pub(crate) fn new(needle_len: usize) -> Runner { |
74 | assert!(needle_len >= 1, "needle_len must be at least 1" ); |
75 | assert!(needle_len <= 3, "needle_len must be at most 3" ); |
76 | Runner { needle_len } |
77 | } |
78 | |
79 | /// Run all tests. This panics on the first failure. |
80 | /// |
81 | /// If the implementation being tested returns `None` for a particular |
82 | /// haystack/needle combination, then that test is skipped. |
83 | pub(crate) fn forward_iter<F>(self, mut test: F) |
84 | where |
85 | F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static, |
86 | { |
87 | for seed in SEEDS.iter() { |
88 | if seed.needles.len() > self.needle_len { |
89 | continue; |
90 | } |
91 | for t in seed.generate() { |
92 | let results = match test(t.haystack.as_bytes(), &t.needles) { |
93 | None => continue, |
94 | Some(results) => results, |
95 | }; |
96 | assert_eq!( |
97 | t.expected, |
98 | results, |
99 | "needles: {:?}, haystack: {:?}" , |
100 | t.needles |
101 | .iter() |
102 | .map(|&b| b.to_char()) |
103 | .collect::<Vec<char>>(), |
104 | t.haystack, |
105 | ); |
106 | } |
107 | } |
108 | } |
109 | |
110 | /// Run all tests in the reverse direction. This panics on the first |
111 | /// failure. |
112 | /// |
113 | /// If the implementation being tested returns `None` for a particular |
114 | /// haystack/needle combination, then that test is skipped. |
115 | pub(crate) fn reverse_iter<F>(self, mut test: F) |
116 | where |
117 | F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static, |
118 | { |
119 | for seed in SEEDS.iter() { |
120 | if seed.needles.len() > self.needle_len { |
121 | continue; |
122 | } |
123 | for t in seed.generate() { |
124 | let mut results = match test(t.haystack.as_bytes(), &t.needles) |
125 | { |
126 | None => continue, |
127 | Some(results) => results, |
128 | }; |
129 | results.reverse(); |
130 | assert_eq!( |
131 | t.expected, |
132 | results, |
133 | "needles: {:?}, haystack: {:?}" , |
134 | t.needles |
135 | .iter() |
136 | .map(|&b| b.to_char()) |
137 | .collect::<Vec<char>>(), |
138 | t.haystack, |
139 | ); |
140 | } |
141 | } |
142 | } |
143 | |
144 | /// Run all tests as counting tests. This panics on the first failure. |
145 | /// |
146 | /// That is, this only checks that the number of matches is correct and |
147 | /// not whether the offsets of each match are. |
148 | pub(crate) fn count_iter<F>(self, mut test: F) |
149 | where |
150 | F: FnMut(&[u8], &[u8]) -> Option<usize> + 'static, |
151 | { |
152 | for seed in SEEDS.iter() { |
153 | if seed.needles.len() > self.needle_len { |
154 | continue; |
155 | } |
156 | for t in seed.generate() { |
157 | let got = match test(t.haystack.as_bytes(), &t.needles) { |
158 | None => continue, |
159 | Some(got) => got, |
160 | }; |
161 | assert_eq!( |
162 | t.expected.len(), |
163 | got, |
164 | "needles: {:?}, haystack: {:?}" , |
165 | t.needles |
166 | .iter() |
167 | .map(|&b| b.to_char()) |
168 | .collect::<Vec<char>>(), |
169 | t.haystack, |
170 | ); |
171 | } |
172 | } |
173 | } |
174 | |
175 | /// Like `Runner::forward`, but for a function that returns only the next |
176 | /// match and not all matches. |
177 | /// |
178 | /// If the function returns `None`, then it is skipped. |
179 | pub(crate) fn forward_oneshot<F>(self, mut test: F) |
180 | where |
181 | F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static, |
182 | { |
183 | self.forward_iter(move |haystack, needles| { |
184 | let mut start = 0; |
185 | let mut results = vec![]; |
186 | while let Some(i) = test(&haystack[start..], needles)? { |
187 | results.push(start + i); |
188 | start += i + 1; |
189 | } |
190 | Some(results) |
191 | }) |
192 | } |
193 | |
194 | /// Like `Runner::reverse`, but for a function that returns only the last |
195 | /// match and not all matches. |
196 | /// |
197 | /// If the function returns `None`, then it is skipped. |
198 | pub(crate) fn reverse_oneshot<F>(self, mut test: F) |
199 | where |
200 | F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static, |
201 | { |
202 | self.reverse_iter(move |haystack, needles| { |
203 | let mut end = haystack.len(); |
204 | let mut results = vec![]; |
205 | while let Some(i) = test(&haystack[..end], needles)? { |
206 | results.push(i); |
207 | end = i; |
208 | } |
209 | Some(results) |
210 | }) |
211 | } |
212 | } |
213 | |
214 | /// A single test for memr?chr{,2,3}. |
215 | #[derive(Clone, Debug)] |
216 | struct Test { |
217 | /// The string to search in. |
218 | haystack: String, |
219 | /// The needles to look for. |
220 | needles: Vec<u8>, |
221 | /// The offsets that are expected to be found for all needles in the |
222 | /// forward direction. |
223 | expected: Vec<usize>, |
224 | } |
225 | |
226 | impl Test { |
227 | fn new(seed: &Seed) -> Test { |
228 | Test { |
229 | haystack: seed.haystack.to_string(), |
230 | needles: seed.needles.to_vec(), |
231 | expected: seed.positions.to_vec(), |
232 | } |
233 | } |
234 | } |
235 | |
236 | /// Data that can be expanded into many memchr tests by padding out the corpus. |
237 | #[derive(Clone, Debug)] |
238 | struct Seed { |
239 | /// The thing to search. We use `&str` instead of `&[u8]` because they |
240 | /// are nicer to write in tests, and we don't miss much since memchr |
241 | /// doesn't care about UTF-8. |
242 | /// |
243 | /// Corpora cannot contain either '%' or '#'. We use these bytes when |
244 | /// expanding test cases into many test cases, and we assume they are not |
245 | /// used. If they are used, `memchr_tests` will panic. |
246 | haystack: &'static str, |
247 | /// The needles to search for. This is intended to be an alternation of |
248 | /// needles. The number of needles may cause this test to be skipped for |
249 | /// some memchr variants. For example, a test with 2 needles cannot be used |
250 | /// to test `memchr`, but can be used to test `memchr2` and `memchr3`. |
251 | /// However, a test with only 1 needle can be used to test all of `memchr`, |
252 | /// `memchr2` and `memchr3`. We achieve this by filling in the needles with |
253 | /// bytes that we never used in the corpus (such as '#'). |
254 | needles: &'static [u8], |
255 | /// The positions expected to match for all of the needles. |
256 | positions: &'static [usize], |
257 | } |
258 | |
259 | impl Seed { |
260 | /// Controls how much we expand the haystack on either side for each test. |
261 | /// We lower this on Miri because otherwise running the tests would take |
262 | /// forever. |
263 | const EXPAND_LEN: usize = { |
264 | #[cfg (not(miri))] |
265 | { |
266 | 515 |
267 | } |
268 | #[cfg (miri)] |
269 | { |
270 | 6 |
271 | } |
272 | }; |
273 | |
274 | /// Expand this test into many variations of the same test. |
275 | /// |
276 | /// In particular, this will generate more tests with larger corpus sizes. |
277 | /// The expected positions are updated to maintain the integrity of the |
278 | /// test. |
279 | /// |
280 | /// This is important in testing a memchr implementation, because there are |
281 | /// often different cases depending on the length of the corpus. |
282 | /// |
283 | /// Note that we extend the corpus by adding `%` bytes, which we |
284 | /// don't otherwise use as a needle. |
285 | fn generate(&self) -> impl Iterator<Item = Test> { |
286 | let mut more = vec![]; |
287 | |
288 | // Add bytes to the start of the corpus. |
289 | for i in 0..Seed::EXPAND_LEN { |
290 | let mut t = Test::new(self); |
291 | let mut new: String = core::iter::repeat('%' ).take(i).collect(); |
292 | new.push_str(&t.haystack); |
293 | t.haystack = new; |
294 | t.expected = t.expected.into_iter().map(|p| p + i).collect(); |
295 | more.push(t); |
296 | } |
297 | // Add bytes to the end of the corpus. |
298 | for i in 1..Seed::EXPAND_LEN { |
299 | let mut t = Test::new(self); |
300 | let padding: String = core::iter::repeat('%' ).take(i).collect(); |
301 | t.haystack.push_str(&padding); |
302 | more.push(t); |
303 | } |
304 | |
305 | more.into_iter() |
306 | } |
307 | } |
308 | |