mod.rs - Codebrowser

1	use alloc::{
2	string::{String, ToString},
3	vec,
4	vec::Vec,
5	};
6
7	use crate::ext::Byte;
8
9	pub(crate) mod naive;
10	#[macro_use]
11	pub(crate) mod prop;
12
13	const SEEDS: &'static [Seed] = &[
14	Seed { haystack: "a", needles: &[b'a'], positions: &[`0`] },
15	Seed { haystack: "aa", needles: &[b'a'], positions: &[`0`, `1`] },
16	Seed { haystack: "aaa", needles: &[b'a'], positions: &[`0`, `1`, `2`] },
17	Seed { haystack: "", needles: &[b'a'], positions: &[] },
18	Seed { haystack: "z", needles: &[b'a'], positions: &[] },
19	Seed { haystack: "zz", needles: &[b'a'], positions: &[] },
20	Seed { haystack: "zza", needles: &[b'a'], positions: &[`2`] },
21	Seed { haystack: "zaza", needles: &[b'a'], positions: &[`1`, `3`] },
22	Seed { haystack: "zzza", needles: &[b'a'], positions: &[`3`] },
23	Seed { haystack: "`\x00`a", needles: &[b'a'], positions: &[`1`] },
24	Seed { haystack: "`\x00`", needles: &[b'`\x00`'], positions: &[`0`] },
25	Seed { haystack: "`\x00\x00`", needles: &[b'`\x00`'], positions: &[`0`, `1`] },
26	Seed { haystack: "`\x00`a`\x00`", needles: &[b'`\x00`'], positions: &[`0`, `2`] },
27	Seed { haystack: "zzzzzzzzzzzzzzzza", needles: &[b'a'], positions: &[`16`] },
28	Seed {
29	haystack: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza",
30	needles: &[b'a'],
31	positions: &[`32`],
32	},
33	// two needles (applied to memchr2 + memchr3)
34	Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[`0`, `1`] },
35	Seed { haystack: "az", needles: &[b'a', b'z'], positions: &[`0`, `1`] },
36	Seed { haystack: "az", needles: &[b'x', b'y'], positions: &[] },
37	Seed { haystack: "az", needles: &[b'a', b'y'], positions: &[`0`] },
38	Seed { haystack: "az", needles: &[b'x', b'z'], positions: &[`1`] },
39	Seed { haystack: "yyyyaz", needles: &[b'a', b'z'], positions: &[`4`, `5`] },
40	Seed { haystack: "yyyyaz", needles: &[b'z', b'a'], positions: &[`4`, `5`] },
41	// three needles (applied to memchr3)
42	Seed {
43	haystack: "xyz",
44	needles: &[b'x', b'y', b'z'],
45	positions: &[`0`, `1`, `2`],
46	},
47	Seed {
48	haystack: "zxy",
49	needles: &[b'x', b'y', b'z'],
50	positions: &[`0`, `1`, `2`],
51	},
52	Seed { haystack: "zxy", needles: &[b'x', b'a', b'z'], positions: &[`0`, `1`] },
53	Seed { haystack: "zxy", needles: &[b't', b'a', b'z'], positions: &[`0`] },
54	Seed { haystack: "yxz", needles: &[b't', b'a', b'z'], positions: &[`2`] },
55	];
56
57	/// Runs a host of substring search tests.
58	///
59	/// This has support for "partial" substring search implementations only work
60	/// for a subset of needles/haystacks. For example, the "packed pair" substring
61	/// search implementation only works for haystacks of some minimum length based
62	/// of the pair of bytes selected and the size of the vector used.
63	pub(crate) struct Runner {
64	needle_len: usize,
65	}
66
67	impl Runner {
68	/// Create a new test runner for forward and reverse byte search
69	/// implementations.
70	///
71	/// The `needle_len` given must be at most `3` and at least `1`. It
72	/// corresponds to the number of needle bytes to search for.
73	pub(crate) fn new(needle_len: usize) -> Runner {
74	assert!(needle_len >= `1`, "needle_len must be at least 1");
75	assert!(needle_len <= `3`, "needle_len must be at most 3");
76	Runner { needle_len }
77	}
78
79	/// Run all tests. This panics on the first failure.
80	///
81	/// If the implementation being tested returns `None` for a particular
82	/// haystack/needle combination, then that test is skipped.
83	pub(crate) fn forward_iter<F>(self, mut test: F)
84	where
85	F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static,
86	{
87	for seed in SEEDS.iter() {
88	if seed.needles.len() > self.needle_len {
89	continue;
90	}
91	for t in seed.generate() {
92	let results = match test(t.haystack.as_bytes(), &t.needles) {
93	None => continue,
94	Some(results) => results,
95	};
96	assert_eq!(
97	t.expected,
98	results,
99	"needles: {:?}, haystack: {:?}",
100	t.needles
101	.iter()
102	.map(\|&b\| b.to_char())
103	.collect::<Vec<char>>(),
104	t.haystack,
105	);
106	}
107	}
108	}
109
110	/// Run all tests in the reverse direction. This panics on the first
111	/// failure.
112	///
113	/// If the implementation being tested returns `None` for a particular
114	/// haystack/needle combination, then that test is skipped.
115	pub(crate) fn reverse_iter<F>(self, mut test: F)
116	where
117	F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static,
118	{
119	for seed in SEEDS.iter() {
120	if seed.needles.len() > self.needle_len {
121	continue;
122	}
123	for t in seed.generate() {
124	let mut results = match test(t.haystack.as_bytes(), &t.needles)
125	{
126	None => continue,
127	Some(results) => results,
128	};
129	results.reverse();
130	assert_eq!(
131	t.expected,
132	results,
133	"needles: {:?}, haystack: {:?}",
134	t.needles
135	.iter()
136	.map(\|&b\| b.to_char())
137	.collect::<Vec<char>>(),
138	t.haystack,
139	);
140	}
141	}
142	}
143
144	/// Run all tests as counting tests. This panics on the first failure.
145	///
146	/// That is, this only checks that the number of matches is correct and
147	/// not whether the offsets of each match are.
148	pub(crate) fn count_iter<F>(self, mut test: F)
149	where
150	F: FnMut(&[u8], &[u8]) -> Option<usize> + 'static,
151	{
152	for seed in SEEDS.iter() {
153	if seed.needles.len() > self.needle_len {
154	continue;
155	}
156	for t in seed.generate() {
157	let got = match test(t.haystack.as_bytes(), &t.needles) {
158	None => continue,
159	Some(got) => got,
160	};
161	assert_eq!(
162	t.expected.len(),
163	got,
164	"needles: {:?}, haystack: {:?}",
165	t.needles
166	.iter()
167	.map(\|&b\| b.to_char())
168	.collect::<Vec<char>>(),
169	t.haystack,
170	);
171	}
172	}
173	}
174
175	/// Like `Runner::forward`, but for a function that returns only the next
176	/// match and not all matches.
177	///
178	/// If the function returns `None`, then it is skipped.
179	pub(crate) fn forward_oneshot<F>(self, mut test: F)
180	where
181	F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,
182	{
183	self.forward_iter(move \|haystack, needles\| {
184	let mut start = `0`;
185	let mut results = vec![];
186	while let Some(i) = test(&haystack[start..], needles)? {
187	results.push(start + i);
188	start += i + `1`;
189	}
190	Some(results)
191	})
192	}
193
194	/// Like `Runner::reverse`, but for a function that returns only the last
195	/// match and not all matches.
196	///
197	/// If the function returns `None`, then it is skipped.
198	pub(crate) fn reverse_oneshot<F>(self, mut test: F)
199	where
200	F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static,
201	{
202	self.reverse_iter(move \|haystack, needles\| {
203	let mut end = haystack.len();
204	let mut results = vec![];
205	while let Some(i) = test(&haystack[..end], needles)? {
206	results.push(i);
207	end = i;
208	}
209	Some(results)
210	})
211	}
212	}
213
214	/// A single test for memr?chr{,2,3}.
215	#[derive(Clone, Debug)]
216	struct Test {
217	/// The string to search in.
218	haystack: String,
219	/// The needles to look for.
220	needles: Vec<u8>,
221	/// The offsets that are expected to be found for all needles in the
222	/// forward direction.
223	expected: Vec<usize>,
224	}
225
226	impl Test {
227	fn new(seed: &Seed) -> Test {
228	Test {
229	haystack: seed.haystack.to_string(),
230	needles: seed.needles.to_vec(),
231	expected: seed.positions.to_vec(),
232	}
233	}
234	}
235
236	/// Data that can be expanded into many memchr tests by padding out the corpus.
237	#[derive(Clone, Debug)]
238	struct Seed {
239	/// The thing to search. We use `&str` instead of `&[u8]` because they
240	/// are nicer to write in tests, and we don't miss much since memchr
241	/// doesn't care about UTF-8.
242	///
243	/// Corpora cannot contain either '%' or '#'. We use these bytes when
244	/// expanding test cases into many test cases, and we assume they are not
245	/// used. If they are used, `memchr_tests` will panic.
246	haystack: &'static str,
247	/// The needles to search for. This is intended to be an alternation of
248	/// needles. The number of needles may cause this test to be skipped for
249	/// some memchr variants. For example, a test with 2 needles cannot be used
250	/// to test `memchr`, but can be used to test `memchr2` and `memchr3`.
251	/// However, a test with only 1 needle can be used to test all of `memchr`,
252	/// `memchr2` and `memchr3`. We achieve this by filling in the needles with
253	/// bytes that we never used in the corpus (such as '#').
254	needles: &'static [u8],
255	/// The positions expected to match for all of the needles.
256	positions: &'static [usize],
257	}
258
259	impl Seed {
260	/// Controls how much we expand the haystack on either side for each test.
261	/// We lower this on Miri because otherwise running the tests would take
262	/// forever.
263	const EXPAND_LEN: usize = {
264	#[cfg(not(miri))]
265	{
266	`515`
267	}
268	#[cfg(miri)]
269	{
270	`6`
271	}
272	};
273
274	/// Expand this test into many variations of the same test.
275	///
276	/// In particular, this will generate more tests with larger corpus sizes.
277	/// The expected positions are updated to maintain the integrity of the
278	/// test.
279	///
280	/// This is important in testing a memchr implementation, because there are
281	/// often different cases depending on the length of the corpus.
282	///
283	/// Note that we extend the corpus by adding `%` bytes, which we
284	/// don't otherwise use as a needle.
285	fn generate(&self) -> impl Iterator<Item = Test> {
286	let mut more = vec![];
287
288	// Add bytes to the start of the corpus.
289	for i in `0`..Seed::EXPAND_LEN {
290	let mut t = Test::new(self);
291	let mut new: String = core::iter::repeat('%').take(i).collect();
292	new.push_str(&t.haystack);
293	t.haystack = new;
294	t.expected = t.expected.into_iter().map(\|p\| p + i).collect();
295	more.push(t);
296	}
297	// Add bytes to the end of the corpus.
298	for i in `1`..Seed::EXPAND_LEN {
299	let mut t = Test::new(self);
300	let padding: String = core::iter::repeat('%').take(i).collect();
301	t.haystack.push_str(&padding);
302	more.push(t);
303	}
304
305	more.into_iter()
306	}
307	}
308