packedpair.rs - Codebrowser

1	use alloc::{boxed::Box, vec, vec::Vec};
2
3	/// A set of "packed pair" test seeds. Each seed serves as the base for the
4	/// generation of many other tests. In essence, the seed captures the pair of
5	/// bytes we used for a predicate and first byte among our needle. The tests
6	/// generated from each seed essentially vary the length of the needle and
7	/// haystack, while using the rare/first byte configuration from the seed.
8	///
9	/// The purpose of this is to test many different needle/haystack lengths.
10	/// In particular, some of the vector optimizations might only have bugs
11	/// in haystacks of a certain size.
12	const SEEDS: &[Seed] = &[
13	// Why not use different 'first' bytes? It seemed like a good idea to be
14	// able to configure it, but when I wrote the test generator below, it
15	// didn't seem necessary to use for reasons that I forget.
16	Seed { first: b'x', index1: b'y', index2: b'z' },
17	Seed { first: b'x', index1: b'x', index2: b'z' },
18	Seed { first: b'x', index1: b'y', index2: b'x' },
19	Seed { first: b'x', index1: b'x', index2: b'x' },
20	Seed { first: b'x', index1: b'y', index2: b'y' },
21	];
22
23	/// Runs a host of "packed pair" search tests.
24	///
25	/// These tests specifically look for the occurrence of a possible substring
26	/// match based on a pair of bytes matching at the right offsets.
27	pub(crate) struct Runner {
28	fwd: Option<
29	Box<
30	dyn FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static,
31	>,
32	>,
33	}
34
35	impl Runner {
36	/// Create a new test runner for "packed pair" substring search.
37	pub(crate) fn new() -> Runner {
38	Runner { fwd: None }
39	}
40
41	/// Run all tests. This panics on the first failure.
42	///
43	/// If the implementation being tested returns `None` for a particular
44	/// haystack/needle combination, then that test is skipped.
45	///
46	/// This runs tests on both the forward and reverse implementations given.
47	/// If either (or both) are missing, then tests for that implementation are
48	/// skipped.
49	pub(crate) fn run(self) {
50	if let Some(mut fwd) = self.fwd {
51	for seed in SEEDS.iter() {
52	for t in seed.generate() {
53	match fwd(&t.haystack, &t.needle, t.index1, t.index2) {
54	None => continue,
55	Some(result) => {
56	assert_eq!(
57	t.fwd, result,
58	"FORWARD, needle: {:?}, haystack: {:?}, \
59	index1: {:?}, index2: {:?}",
60	t.needle, t.haystack, t.index1, t.index2,
61	)
62	}
63	}
64	}
65	}
66	}
67	}
68
69	/// Set the implementation for forward "packed pair" substring search.
70	///
71	/// If the closure returns `None`, then it is assumed that the given
72	/// test cannot be applied to the particular implementation and it is
73	/// skipped. For example, if a particular implementation only supports
74	/// needles or haystacks for some minimum length.
75	///
76	/// If this is not set, then forward "packed pair" search is not tested.
77	pub(crate) fn fwd(
78	mut self,
79	search: impl FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static,
80	) -> Runner {
81	self.fwd = Some(Box::new(search));
82	self
83	}
84	}
85
86	/// A test that represents the input and expected output to a "packed pair"
87	/// search function. The test should be able to run with any "packed pair"
88	/// implementation and get the expected output.
89	struct Test {
90	haystack: Vec<u8>,
91	needle: Vec<u8>,
92	index1: u8,
93	index2: u8,
94	fwd: Option<usize>,
95	}
96
97	impl Test {
98	/// Create a new "packed pair" test from a seed and some given offsets to
99	/// the pair of bytes to use as a predicate in the seed's needle.
100	///
101	/// If a valid test could not be constructed, then None is returned.
102	/// (Currently, we take the approach of massaging tests to be valid
103	/// instead of rejecting them outright.)
104	fn new(
105	seed: Seed,
106	index1: usize,
107	index2: usize,
108	haystack_len: usize,
109	needle_len: usize,
110	fwd: Option<usize>,
111	) -> Option<Test> {
112	let mut index1: u8 = index1.try_into().unwrap();
113	let mut index2: u8 = index2.try_into().unwrap();
114	// The '#' byte is never used in a haystack (unless we're expecting
115	// a match), while the '@' byte is never used in a needle.
116	let mut haystack = vec![b'@'; haystack_len];
117	let mut needle = vec![b'#'; needle_len];
118	needle[`0`] = seed.first;
119	needle[index1 as usize] = seed.index1;
120	needle[index2 as usize] = seed.index2;
121	// If we're expecting a match, then make sure the needle occurs
122	// in the haystack at the expected position.
123	if let Some(i) = fwd {
124	haystack[i..i + needle.len()].copy_from_slice(&needle);
125	}
126	// If the operations above lead to rare offsets pointing to the
127	// non-first occurrence of a byte, then adjust it. This might lead
128	// to redundant tests, but it's simpler than trying to change the
129	// generation process I think.
130	if let Some(i) = crate::memchr(seed.index1, &needle) {
131	index1 = u8::try_from(i).unwrap();
132	}
133	if let Some(i) = crate::memchr(seed.index2, &needle) {
134	index2 = u8::try_from(i).unwrap();
135	}
136	Some(Test { haystack, needle, index1, index2, fwd })
137	}
138	}
139
140	/// Data that describes a single prefilter test seed.
141	#[derive(Clone, Copy)]
142	struct Seed {
143	first: u8,
144	index1: u8,
145	index2: u8,
146	}
147
148	impl Seed {
149	const NEEDLE_LENGTH_LIMIT: usize = {
150	#[cfg(not(miri))]
151	{
152	`33`
153	}
154	#[cfg(miri)]
155	{
156	`5`
157	}
158	};
159
160	const HAYSTACK_LENGTH_LIMIT: usize = {
161	#[cfg(not(miri))]
162	{
163	`65`
164	}
165	#[cfg(miri)]
166	{
167	`8`
168	}
169	};
170
171	/// Generate a series of prefilter tests from this seed.
172	fn generate(self) -> impl Iterator<Item = Test> {
173	let len_start = `2`;
174	// The iterator below generates a lot* of tests. The number of*
175	// tests was chosen somewhat empirically to be "bearable" when
176	// running the test suite.
177	//
178	// We use an iterator here because the collective haystacks of all
179	// these test cases add up to enough memory to OOM a conservative
180	// sandbox or a small laptop.
181	(len_start..=Seed::NEEDLE_LENGTH_LIMIT).flat_map(move \|needle_len\| {
182	let index_start = len_start - `1`;
183	(index_start..needle_len).flat_map(move \|index1\| {
184	(index1..needle_len).flat_map(move \|index2\| {
185	(needle_len..=Seed::HAYSTACK_LENGTH_LIMIT).flat_map(
186	move \|haystack_len\| {
187	Test::new(
188	self,
189	index1,
190	index2,
191	haystack_len,
192	needle_len,
193	None,
194	)
195	.into_iter()
196	.chain(
197	(`0`..=(haystack_len - needle_len)).flat_map(
198	move \|output\| {
199	Test::new(
200	self,
201	index1,
202	index2,
203	haystack_len,
204	needle_len,
205	Some(output),
206	)
207	},
208	),
209	)
210	},
211	)
212	})
213	})
214	})
215	}
216	}
217

Provided by KDAB

Definitions