| 1 | use alloc::{boxed::Box, vec, vec::Vec}; |
| 2 | |
| 3 | /// A set of "packed pair" test seeds. Each seed serves as the base for the |
| 4 | /// generation of many other tests. In essence, the seed captures the pair of |
| 5 | /// bytes we used for a predicate and first byte among our needle. The tests |
| 6 | /// generated from each seed essentially vary the length of the needle and |
| 7 | /// haystack, while using the rare/first byte configuration from the seed. |
| 8 | /// |
| 9 | /// The purpose of this is to test many different needle/haystack lengths. |
| 10 | /// In particular, some of the vector optimizations might only have bugs |
| 11 | /// in haystacks of a certain size. |
| 12 | const SEEDS: &[Seed] = &[ |
| 13 | // Why not use different 'first' bytes? It seemed like a good idea to be |
| 14 | // able to configure it, but when I wrote the test generator below, it |
| 15 | // didn't seem necessary to use for reasons that I forget. |
| 16 | Seed { first: b'x' , index1: b'y' , index2: b'z' }, |
| 17 | Seed { first: b'x' , index1: b'x' , index2: b'z' }, |
| 18 | Seed { first: b'x' , index1: b'y' , index2: b'x' }, |
| 19 | Seed { first: b'x' , index1: b'x' , index2: b'x' }, |
| 20 | Seed { first: b'x' , index1: b'y' , index2: b'y' }, |
| 21 | ]; |
| 22 | |
| 23 | /// Runs a host of "packed pair" search tests. |
| 24 | /// |
| 25 | /// These tests specifically look for the occurrence of a possible substring |
| 26 | /// match based on a pair of bytes matching at the right offsets. |
| 27 | pub(crate) struct Runner { |
| 28 | fwd: Option< |
| 29 | Box< |
| 30 | dyn FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static, |
| 31 | >, |
| 32 | >, |
| 33 | } |
| 34 | |
| 35 | impl Runner { |
| 36 | /// Create a new test runner for "packed pair" substring search. |
| 37 | pub(crate) fn new() -> Runner { |
| 38 | Runner { fwd: None } |
| 39 | } |
| 40 | |
| 41 | /// Run all tests. This panics on the first failure. |
| 42 | /// |
| 43 | /// If the implementation being tested returns `None` for a particular |
| 44 | /// haystack/needle combination, then that test is skipped. |
| 45 | /// |
| 46 | /// This runs tests on both the forward and reverse implementations given. |
| 47 | /// If either (or both) are missing, then tests for that implementation are |
| 48 | /// skipped. |
| 49 | pub(crate) fn run(self) { |
| 50 | if let Some(mut fwd) = self.fwd { |
| 51 | for seed in SEEDS.iter() { |
| 52 | for t in seed.generate() { |
| 53 | match fwd(&t.haystack, &t.needle, t.index1, t.index2) { |
| 54 | None => continue, |
| 55 | Some(result) => { |
| 56 | assert_eq!( |
| 57 | t.fwd, result, |
| 58 | "FORWARD, needle: {:?}, haystack: {:?}, \ |
| 59 | index1: {:?}, index2: {:?}" , |
| 60 | t.needle, t.haystack, t.index1, t.index2, |
| 61 | ) |
| 62 | } |
| 63 | } |
| 64 | } |
| 65 | } |
| 66 | } |
| 67 | } |
| 68 | |
| 69 | /// Set the implementation for forward "packed pair" substring search. |
| 70 | /// |
| 71 | /// If the closure returns `None`, then it is assumed that the given |
| 72 | /// test cannot be applied to the particular implementation and it is |
| 73 | /// skipped. For example, if a particular implementation only supports |
| 74 | /// needles or haystacks for some minimum length. |
| 75 | /// |
| 76 | /// If this is not set, then forward "packed pair" search is not tested. |
| 77 | pub(crate) fn fwd( |
| 78 | mut self, |
| 79 | search: impl FnMut(&[u8], &[u8], u8, u8) -> Option<Option<usize>> + 'static, |
| 80 | ) -> Runner { |
| 81 | self.fwd = Some(Box::new(search)); |
| 82 | self |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | /// A test that represents the input and expected output to a "packed pair" |
| 87 | /// search function. The test should be able to run with any "packed pair" |
| 88 | /// implementation and get the expected output. |
| 89 | struct Test { |
| 90 | haystack: Vec<u8>, |
| 91 | needle: Vec<u8>, |
| 92 | index1: u8, |
| 93 | index2: u8, |
| 94 | fwd: Option<usize>, |
| 95 | } |
| 96 | |
| 97 | impl Test { |
| 98 | /// Create a new "packed pair" test from a seed and some given offsets to |
| 99 | /// the pair of bytes to use as a predicate in the seed's needle. |
| 100 | /// |
| 101 | /// If a valid test could not be constructed, then None is returned. |
| 102 | /// (Currently, we take the approach of massaging tests to be valid |
| 103 | /// instead of rejecting them outright.) |
| 104 | fn new( |
| 105 | seed: Seed, |
| 106 | index1: usize, |
| 107 | index2: usize, |
| 108 | haystack_len: usize, |
| 109 | needle_len: usize, |
| 110 | fwd: Option<usize>, |
| 111 | ) -> Option<Test> { |
| 112 | let mut index1: u8 = index1.try_into().unwrap(); |
| 113 | let mut index2: u8 = index2.try_into().unwrap(); |
| 114 | // The '#' byte is never used in a haystack (unless we're expecting |
| 115 | // a match), while the '@' byte is never used in a needle. |
| 116 | let mut haystack = vec![b'@' ; haystack_len]; |
| 117 | let mut needle = vec![b'#' ; needle_len]; |
| 118 | needle[0] = seed.first; |
| 119 | needle[index1 as usize] = seed.index1; |
| 120 | needle[index2 as usize] = seed.index2; |
| 121 | // If we're expecting a match, then make sure the needle occurs |
| 122 | // in the haystack at the expected position. |
| 123 | if let Some(i) = fwd { |
| 124 | haystack[i..i + needle.len()].copy_from_slice(&needle); |
| 125 | } |
| 126 | // If the operations above lead to rare offsets pointing to the |
| 127 | // non-first occurrence of a byte, then adjust it. This might lead |
| 128 | // to redundant tests, but it's simpler than trying to change the |
| 129 | // generation process I think. |
| 130 | if let Some(i) = crate::memchr(seed.index1, &needle) { |
| 131 | index1 = u8::try_from(i).unwrap(); |
| 132 | } |
| 133 | if let Some(i) = crate::memchr(seed.index2, &needle) { |
| 134 | index2 = u8::try_from(i).unwrap(); |
| 135 | } |
| 136 | Some(Test { haystack, needle, index1, index2, fwd }) |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | /// Data that describes a single prefilter test seed. |
| 141 | #[derive(Clone, Copy)] |
| 142 | struct Seed { |
| 143 | first: u8, |
| 144 | index1: u8, |
| 145 | index2: u8, |
| 146 | } |
| 147 | |
| 148 | impl Seed { |
| 149 | const NEEDLE_LENGTH_LIMIT: usize = { |
| 150 | #[cfg (not(miri))] |
| 151 | { |
| 152 | 33 |
| 153 | } |
| 154 | #[cfg (miri)] |
| 155 | { |
| 156 | 5 |
| 157 | } |
| 158 | }; |
| 159 | |
| 160 | const HAYSTACK_LENGTH_LIMIT: usize = { |
| 161 | #[cfg (not(miri))] |
| 162 | { |
| 163 | 65 |
| 164 | } |
| 165 | #[cfg (miri)] |
| 166 | { |
| 167 | 8 |
| 168 | } |
| 169 | }; |
| 170 | |
| 171 | /// Generate a series of prefilter tests from this seed. |
| 172 | fn generate(self) -> impl Iterator<Item = Test> { |
| 173 | let len_start = 2; |
| 174 | // The iterator below generates *a lot* of tests. The number of |
| 175 | // tests was chosen somewhat empirically to be "bearable" when |
| 176 | // running the test suite. |
| 177 | // |
| 178 | // We use an iterator here because the collective haystacks of all |
| 179 | // these test cases add up to enough memory to OOM a conservative |
| 180 | // sandbox or a small laptop. |
| 181 | (len_start..=Seed::NEEDLE_LENGTH_LIMIT).flat_map(move |needle_len| { |
| 182 | let index_start = len_start - 1; |
| 183 | (index_start..needle_len).flat_map(move |index1| { |
| 184 | (index1..needle_len).flat_map(move |index2| { |
| 185 | (needle_len..=Seed::HAYSTACK_LENGTH_LIMIT).flat_map( |
| 186 | move |haystack_len| { |
| 187 | Test::new( |
| 188 | self, |
| 189 | index1, |
| 190 | index2, |
| 191 | haystack_len, |
| 192 | needle_len, |
| 193 | None, |
| 194 | ) |
| 195 | .into_iter() |
| 196 | .chain( |
| 197 | (0..=(haystack_len - needle_len)).flat_map( |
| 198 | move |output| { |
| 199 | Test::new( |
| 200 | self, |
| 201 | index1, |
| 202 | index2, |
| 203 | haystack_len, |
| 204 | needle_len, |
| 205 | Some(output), |
| 206 | ) |
| 207 | }, |
| 208 | ), |
| 209 | ) |
| 210 | }, |
| 211 | ) |
| 212 | }) |
| 213 | }) |
| 214 | }) |
| 215 | } |
| 216 | } |
| 217 | |