1use alloc::{
2 string::{String, ToString},
3 vec,
4 vec::Vec,
5};
6
7use crate::ext::Byte;
8
9pub(crate) mod naive;
10#[macro_use]
11pub(crate) mod prop;
12
13const SEEDS: &'static [Seed] = &[
14 Seed { haystack: "a", needles: &[b'a'], positions: &[0] },
15 Seed { haystack: "aa", needles: &[b'a'], positions: &[0, 1] },
16 Seed { haystack: "aaa", needles: &[b'a'], positions: &[0, 1, 2] },
17 Seed { haystack: "", needles: &[b'a'], positions: &[] },
18 Seed { haystack: "z", needles: &[b'a'], positions: &[] },
19 Seed { haystack: "zz", needles: &[b'a'], positions: &[] },
20 Seed { haystack: "zza", needles: &[b'a'], positions: &[2] },
21 Seed { haystack: "zaza", needles: &[b'a'], positions: &[1, 3] },
22 Seed { haystack: "zzza", needles: &[b'a'], positions: &[3] },
23 Seed { haystack: "\x00a", needles: &[b'a'], positions: &[1] },
24 Seed { haystack: "\x00", needles: &[b'\x00'], positions: &[0] },
25 Seed { haystack: "\x00\x00", needles: &[b'\x00'], positions: &[0, 1] },
26 Seed { haystack: "\x00a\x00", needles: &[b'\x00'], positions: &[0, 2] },
27 Seed { haystack: "zzzzzzzzzzzzzzzza", needles: &[b'a'], positions: &[16] },
28 Seed {
29 haystack: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza",
30 needles: &[b'a'],
31 positions: &[32],
32 },
33 // two needles (applied to memchr2 + memchr3)
34 Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] },
35 Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[0, 1] },
36 Seed { haystack: "az", needles: &[b'x', b'y'], positions: &[] },
37 Seed { haystack: "az", needles: &[b'a', b'y'], positions: &[0] },
38 Seed { haystack: "az", needles: &[b'x', b'z'], positions: &[1] },
39 Seed { haystack: "yyyyaz", needles: &[b'a', b'z'], positions: &[4, 5] },
40 Seed { haystack: "yyyyaz", needles: &[b'z', b'a'], positions: &[4, 5] },
41 // three needles (applied to memchr3)
42 Seed {
43 haystack: "xyz",
44 needles: &[b'x', b'y', b'z'],
45 positions: &[0, 1, 2],
46 },
47 Seed {
48 haystack: "zxy",
49 needles: &[b'x', b'y', b'z'],
50 positions: &[0, 1, 2],
51 },
52 Seed { haystack: "zxy", needles: &[b'x', b'a', b'z'], positions: &[0, 1] },
53 Seed { haystack: "zxy", needles: &[b't', b'a', b'z'], positions: &[0] },
54 Seed { haystack: "yxz", needles: &[b't', b'a', b'z'], positions: &[2] },
55];
56
57/// Runs a host of substring search tests.
58///
59/// This has support for "partial" substring search implementations only work
60/// for a subset of needles/haystacks. For example, the "packed pair" substring
61/// search implementation only works for haystacks of some minimum length based
62/// of the pair of bytes selected and the size of the vector used.
63pub(crate) struct Runner {
64 needle_len: usize,
65}
66
67impl Runner {
68 /// Create a new test runner for forward and reverse byte search
69 /// implementations.
70 ///
71 /// The `needle_len` given must be at most `3` and at least `1`. It
72 /// corresponds to the number of needle bytes to search for.
73 pub(crate) fn new(needle_len: usize) -> Runner {
74 assert!(needle_len >= 1, "needle_len must be at least 1");
75 assert!(needle_len <= 3, "needle_len must be at most 3");
76 Runner { needle_len }
77 }
78
79 /// Run all tests. This panics on the first failure.
80 ///
81 /// If the implementation being tested returns `None` for a particular
82 /// haystack/needle combination, then that test is skipped.
83 pub(crate) fn forward_iter<F>(self, mut test: F)
84 where
85 F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static,
86 {
87 for seed in SEEDS.iter() {
88 if seed.needles.len() > self.needle_len {
89 continue;
90 }
91 for t in seed.generate() {
92 let results = match test(t.haystack.as_bytes(), &t.needles) {
93 None => continue,
94 Some(results) => results,
95 };
96 assert_eq!(
97 t.expected,
98 results,
99 "needles: {:?}, haystack: {:?}",
100 t.needles
101 .iter()
102 .map(|&b| b.to_char())
103 .collect::<Vec<char>>(),
104 t.haystack,
105 );
106 }
107 }
108 }
109
110 /// Run all tests in the reverse direction. This panics on the first
111 /// failure.
112 ///
113 /// If the implementation being tested returns `None` for a particular
114 /// haystack/needle combination, then that test is skipped.
115 pub(crate) fn reverse_iter<F>(self, mut test: F)
116 where
117 F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static,
118 {
119 for seed in SEEDS.iter() {
120 if seed.needles.len() > self.needle_len {
121 continue;
122 }
123 for t in seed.generate() {
124 let mut results = match test(t.haystack.as_bytes(), &t.needles)
125 {
126 None => continue,
127 Some(results) => results,
128 };
129 results.reverse();
130 assert_eq!(
131 t.expected,
132 results,
133 "needles: {:?}, haystack: {:?}",
134 t.needles
135 .iter()
136 .map(|&b| b.to_char())
137 .collect::<Vec<char>>(),
138 t.haystack,
139 );
140 }
141 }
142 }
143
144 /// Run all tests as counting tests. This panics on the first failure.
145 ///
146 /// That is, this only checks that the number of matches is correct and
147 /// not whether the offsets of each match are.
148 pub(crate) fn count_iter<F>(self, mut test: F)
149 where
150 F: FnMut(&[u8], &[u8]) -> Option<usize> + 'static,
151 {
152 for seed in SEEDS.iter() {
153 if seed.needles.len() > self.needle_len {
154 continue;
155 }
156 for t in seed.generate() {
157 let got = match test(t.haystack.as_bytes(), &t.needles) {
158 None => continue,
159 Some(got) => got,
160 };
161 assert_eq!(
162 t.expected.len(),
163 got,
164 "needles: {:?}, haystack: {:?}",
165 t.needles
166 .iter()
167 .map(|&b| b.to_char())
168 .collect::<Vec<char>>(),
169 t.haystack,
170 );
171 }
172 }
173 }
174
175 /// Like `Runner::forward`, but for a function that returns only the next
176 /// match and not all matches.
177 ///
178 /// If the function returns `None`, then it is skipped.
179 pub(crate) fn forward_oneshot<F>(self, mut test: F)
180 where
181 F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,
182 {
183 self.forward_iter(move |haystack, needles| {
184 let mut start = 0;
185 let mut results = vec![];
186 while let Some(i) = test(&haystack[start..], needles)? {
187 results.push(start + i);
188 start += i + 1;
189 }
190 Some(results)
191 })
192 }
193
194 /// Like `Runner::reverse`, but for a function that returns only the last
195 /// match and not all matches.
196 ///
197 /// If the function returns `None`, then it is skipped.
198 pub(crate) fn reverse_oneshot<F>(self, mut test: F)
199 where
200 F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,
201 {
202 self.reverse_iter(move |haystack, needles| {
203 let mut end = haystack.len();
204 let mut results = vec![];
205 while let Some(i) = test(&haystack[..end], needles)? {
206 results.push(i);
207 end = i;
208 }
209 Some(results)
210 })
211 }
212}
213
214/// A single test for memr?chr{,2,3}.
215#[derive(Clone, Debug)]
216struct Test {
217 /// The string to search in.
218 haystack: String,
219 /// The needles to look for.
220 needles: Vec<u8>,
221 /// The offsets that are expected to be found for all needles in the
222 /// forward direction.
223 expected: Vec<usize>,
224}
225
226impl Test {
227 fn new(seed: &Seed) -> Test {
228 Test {
229 haystack: seed.haystack.to_string(),
230 needles: seed.needles.to_vec(),
231 expected: seed.positions.to_vec(),
232 }
233 }
234}
235
236/// Data that can be expanded into many memchr tests by padding out the corpus.
237#[derive(Clone, Debug)]
238struct Seed {
239 /// The thing to search. We use `&str` instead of `&[u8]` because they
240 /// are nicer to write in tests, and we don't miss much since memchr
241 /// doesn't care about UTF-8.
242 ///
243 /// Corpora cannot contain either '%' or '#'. We use these bytes when
244 /// expanding test cases into many test cases, and we assume they are not
245 /// used. If they are used, `memchr_tests` will panic.
246 haystack: &'static str,
247 /// The needles to search for. This is intended to be an alternation of
248 /// needles. The number of needles may cause this test to be skipped for
249 /// some memchr variants. For example, a test with 2 needles cannot be used
250 /// to test `memchr`, but can be used to test `memchr2` and `memchr3`.
251 /// However, a test with only 1 needle can be used to test all of `memchr`,
252 /// `memchr2` and `memchr3`. We achieve this by filling in the needles with
253 /// bytes that we never used in the corpus (such as '#').
254 needles: &'static [u8],
255 /// The positions expected to match for all of the needles.
256 positions: &'static [usize],
257}
258
259impl Seed {
260 /// Controls how much we expand the haystack on either side for each test.
261 /// We lower this on Miri because otherwise running the tests would take
262 /// forever.
263 const EXPAND_LEN: usize = {
264 #[cfg(not(miri))]
265 {
266 515
267 }
268 #[cfg(miri)]
269 {
270 6
271 }
272 };
273
274 /// Expand this test into many variations of the same test.
275 ///
276 /// In particular, this will generate more tests with larger corpus sizes.
277 /// The expected positions are updated to maintain the integrity of the
278 /// test.
279 ///
280 /// This is important in testing a memchr implementation, because there are
281 /// often different cases depending on the length of the corpus.
282 ///
283 /// Note that we extend the corpus by adding `%` bytes, which we
284 /// don't otherwise use as a needle.
285 fn generate(&self) -> impl Iterator<Item = Test> {
286 let mut more = vec![];
287
288 // Add bytes to the start of the corpus.
289 for i in 0..Seed::EXPAND_LEN {
290 let mut t = Test::new(self);
291 let mut new: String = core::iter::repeat('%').take(i).collect();
292 new.push_str(&t.haystack);
293 t.haystack = new;
294 t.expected = t.expected.into_iter().map(|p| p + i).collect();
295 more.push(t);
296 }
297 // Add bytes to the end of the corpus.
298 for i in 1..Seed::EXPAND_LEN {
299 let mut t = Test::new(self);
300 let padding: String = core::iter::repeat('%').take(i).collect();
301 t.haystack.push_str(&padding);
302 more.push(t);
303 }
304
305 more.into_iter()
306 }
307}
308