| 1 | use alloc::{ |
| 2 | string::{String, ToString}, |
| 3 | vec, |
| 4 | vec::Vec, |
| 5 | }; |
| 6 | |
| 7 | use crate::ext::Byte; |
| 8 | |
| 9 | pub(crate) mod naive; |
| 10 | #[macro_use ] |
| 11 | pub(crate) mod prop; |
| 12 | |
| 13 | const SEEDS: &'static [Seed] = &[ |
| 14 | Seed { haystack: "a" , needles: &[b'a' ], positions: &[0] }, |
| 15 | Seed { haystack: "aa" , needles: &[b'a' ], positions: &[0, 1] }, |
| 16 | Seed { haystack: "aaa" , needles: &[b'a' ], positions: &[0, 1, 2] }, |
| 17 | Seed { haystack: "" , needles: &[b'a' ], positions: &[] }, |
| 18 | Seed { haystack: "z" , needles: &[b'a' ], positions: &[] }, |
| 19 | Seed { haystack: "zz" , needles: &[b'a' ], positions: &[] }, |
| 20 | Seed { haystack: "zza" , needles: &[b'a' ], positions: &[2] }, |
| 21 | Seed { haystack: "zaza" , needles: &[b'a' ], positions: &[1, 3] }, |
| 22 | Seed { haystack: "zzza" , needles: &[b'a' ], positions: &[3] }, |
| 23 | Seed { haystack: " \x00a" , needles: &[b'a' ], positions: &[1] }, |
| 24 | Seed { haystack: " \x00" , needles: &[b' \x00' ], positions: &[0] }, |
| 25 | Seed { haystack: " \x00\x00" , needles: &[b' \x00' ], positions: &[0, 1] }, |
| 26 | Seed { haystack: " \x00a \x00" , needles: &[b' \x00' ], positions: &[0, 2] }, |
| 27 | Seed { haystack: "zzzzzzzzzzzzzzzza" , needles: &[b'a' ], positions: &[16] }, |
| 28 | Seed { |
| 29 | haystack: "zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzza" , |
| 30 | needles: &[b'a' ], |
| 31 | positions: &[32], |
| 32 | }, |
| 33 | // two needles (applied to memchr2 + memchr3) |
| 34 | Seed { haystack: "az" , needles: &[b'a' , b'z' ], positions: &[0, 1] }, |
| 35 | Seed { haystack: "az" , needles: &[b'a' , b'z' ], positions: &[0, 1] }, |
| 36 | Seed { haystack: "az" , needles: &[b'x' , b'y' ], positions: &[] }, |
| 37 | Seed { haystack: "az" , needles: &[b'a' , b'y' ], positions: &[0] }, |
| 38 | Seed { haystack: "az" , needles: &[b'x' , b'z' ], positions: &[1] }, |
| 39 | Seed { haystack: "yyyyaz" , needles: &[b'a' , b'z' ], positions: &[4, 5] }, |
| 40 | Seed { haystack: "yyyyaz" , needles: &[b'z' , b'a' ], positions: &[4, 5] }, |
| 41 | // three needles (applied to memchr3) |
| 42 | Seed { |
| 43 | haystack: "xyz" , |
| 44 | needles: &[b'x' , b'y' , b'z' ], |
| 45 | positions: &[0, 1, 2], |
| 46 | }, |
| 47 | Seed { |
| 48 | haystack: "zxy" , |
| 49 | needles: &[b'x' , b'y' , b'z' ], |
| 50 | positions: &[0, 1, 2], |
| 51 | }, |
| 52 | Seed { haystack: "zxy" , needles: &[b'x' , b'a' , b'z' ], positions: &[0, 1] }, |
| 53 | Seed { haystack: "zxy" , needles: &[b't' , b'a' , b'z' ], positions: &[0] }, |
| 54 | Seed { haystack: "yxz" , needles: &[b't' , b'a' , b'z' ], positions: &[2] }, |
| 55 | ]; |
| 56 | |
| 57 | /// Runs a host of substring search tests. |
| 58 | /// |
| 59 | /// This has support for "partial" substring search implementations only work |
| 60 | /// for a subset of needles/haystacks. For example, the "packed pair" substring |
| 61 | /// search implementation only works for haystacks of some minimum length based |
| 62 | /// of the pair of bytes selected and the size of the vector used. |
| 63 | pub(crate) struct Runner { |
| 64 | needle_len: usize, |
| 65 | } |
| 66 | |
| 67 | impl Runner { |
| 68 | /// Create a new test runner for forward and reverse byte search |
| 69 | /// implementations. |
| 70 | /// |
| 71 | /// The `needle_len` given must be at most `3` and at least `1`. It |
| 72 | /// corresponds to the number of needle bytes to search for. |
| 73 | pub(crate) fn new(needle_len: usize) -> Runner { |
| 74 | assert!(needle_len >= 1, "needle_len must be at least 1" ); |
| 75 | assert!(needle_len <= 3, "needle_len must be at most 3" ); |
| 76 | Runner { needle_len } |
| 77 | } |
| 78 | |
| 79 | /// Run all tests. This panics on the first failure. |
| 80 | /// |
| 81 | /// If the implementation being tested returns `None` for a particular |
| 82 | /// haystack/needle combination, then that test is skipped. |
| 83 | pub(crate) fn forward_iter<F>(self, mut test: F) |
| 84 | where |
| 85 | F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static, |
| 86 | { |
| 87 | for seed in SEEDS.iter() { |
| 88 | if seed.needles.len() > self.needle_len { |
| 89 | continue; |
| 90 | } |
| 91 | for t in seed.generate() { |
| 92 | let results = match test(t.haystack.as_bytes(), &t.needles) { |
| 93 | None => continue, |
| 94 | Some(results) => results, |
| 95 | }; |
| 96 | assert_eq!( |
| 97 | t.expected, |
| 98 | results, |
| 99 | "needles: {:?}, haystack: {:?}" , |
| 100 | t.needles |
| 101 | .iter() |
| 102 | .map(|&b| b.to_char()) |
| 103 | .collect::<Vec<char>>(), |
| 104 | t.haystack, |
| 105 | ); |
| 106 | } |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | /// Run all tests in the reverse direction. This panics on the first |
| 111 | /// failure. |
| 112 | /// |
| 113 | /// If the implementation being tested returns `None` for a particular |
| 114 | /// haystack/needle combination, then that test is skipped. |
| 115 | pub(crate) fn reverse_iter<F>(self, mut test: F) |
| 116 | where |
| 117 | F: FnMut(&[u8], &[u8]) -> Option<Vec<usize>> + 'static, |
| 118 | { |
| 119 | for seed in SEEDS.iter() { |
| 120 | if seed.needles.len() > self.needle_len { |
| 121 | continue; |
| 122 | } |
| 123 | for t in seed.generate() { |
| 124 | let mut results = match test(t.haystack.as_bytes(), &t.needles) |
| 125 | { |
| 126 | None => continue, |
| 127 | Some(results) => results, |
| 128 | }; |
| 129 | results.reverse(); |
| 130 | assert_eq!( |
| 131 | t.expected, |
| 132 | results, |
| 133 | "needles: {:?}, haystack: {:?}" , |
| 134 | t.needles |
| 135 | .iter() |
| 136 | .map(|&b| b.to_char()) |
| 137 | .collect::<Vec<char>>(), |
| 138 | t.haystack, |
| 139 | ); |
| 140 | } |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | /// Run all tests as counting tests. This panics on the first failure. |
| 145 | /// |
| 146 | /// That is, this only checks that the number of matches is correct and |
| 147 | /// not whether the offsets of each match are. |
| 148 | pub(crate) fn count_iter<F>(self, mut test: F) |
| 149 | where |
| 150 | F: FnMut(&[u8], &[u8]) -> Option<usize> + 'static, |
| 151 | { |
| 152 | for seed in SEEDS.iter() { |
| 153 | if seed.needles.len() > self.needle_len { |
| 154 | continue; |
| 155 | } |
| 156 | for t in seed.generate() { |
| 157 | let got = match test(t.haystack.as_bytes(), &t.needles) { |
| 158 | None => continue, |
| 159 | Some(got) => got, |
| 160 | }; |
| 161 | assert_eq!( |
| 162 | t.expected.len(), |
| 163 | got, |
| 164 | "needles: {:?}, haystack: {:?}" , |
| 165 | t.needles |
| 166 | .iter() |
| 167 | .map(|&b| b.to_char()) |
| 168 | .collect::<Vec<char>>(), |
| 169 | t.haystack, |
| 170 | ); |
| 171 | } |
| 172 | } |
| 173 | } |
| 174 | |
| 175 | /// Like `Runner::forward`, but for a function that returns only the next |
| 176 | /// match and not all matches. |
| 177 | /// |
| 178 | /// If the function returns `None`, then it is skipped. |
| 179 | pub(crate) fn forward_oneshot<F>(self, mut test: F) |
| 180 | where |
| 181 | F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static, |
| 182 | { |
| 183 | self.forward_iter(move |haystack, needles| { |
| 184 | let mut start = 0; |
| 185 | let mut results = vec![]; |
| 186 | while let Some(i) = test(&haystack[start..], needles)? { |
| 187 | results.push(start + i); |
| 188 | start += i + 1; |
| 189 | } |
| 190 | Some(results) |
| 191 | }) |
| 192 | } |
| 193 | |
| 194 | /// Like `Runner::reverse`, but for a function that returns only the last |
| 195 | /// match and not all matches. |
| 196 | /// |
| 197 | /// If the function returns `None`, then it is skipped. |
| 198 | pub(crate) fn reverse_oneshot<F>(self, mut test: F) |
| 199 | where |
| 200 | F: FnMut(&[u8], &[u8]) -> Option<Option<usize>> + 'static, |
| 201 | { |
| 202 | self.reverse_iter(move |haystack, needles| { |
| 203 | let mut end = haystack.len(); |
| 204 | let mut results = vec![]; |
| 205 | while let Some(i) = test(&haystack[..end], needles)? { |
| 206 | results.push(i); |
| 207 | end = i; |
| 208 | } |
| 209 | Some(results) |
| 210 | }) |
| 211 | } |
| 212 | } |
| 213 | |
| 214 | /// A single test for memr?chr{,2,3}. |
| 215 | #[derive(Clone, Debug)] |
| 216 | struct Test { |
| 217 | /// The string to search in. |
| 218 | haystack: String, |
| 219 | /// The needles to look for. |
| 220 | needles: Vec<u8>, |
| 221 | /// The offsets that are expected to be found for all needles in the |
| 222 | /// forward direction. |
| 223 | expected: Vec<usize>, |
| 224 | } |
| 225 | |
| 226 | impl Test { |
| 227 | fn new(seed: &Seed) -> Test { |
| 228 | Test { |
| 229 | haystack: seed.haystack.to_string(), |
| 230 | needles: seed.needles.to_vec(), |
| 231 | expected: seed.positions.to_vec(), |
| 232 | } |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | /// Data that can be expanded into many memchr tests by padding out the corpus. |
| 237 | #[derive(Clone, Debug)] |
| 238 | struct Seed { |
| 239 | /// The thing to search. We use `&str` instead of `&[u8]` because they |
| 240 | /// are nicer to write in tests, and we don't miss much since memchr |
| 241 | /// doesn't care about UTF-8. |
| 242 | /// |
| 243 | /// Corpora cannot contain either '%' or '#'. We use these bytes when |
| 244 | /// expanding test cases into many test cases, and we assume they are not |
| 245 | /// used. If they are used, `memchr_tests` will panic. |
| 246 | haystack: &'static str, |
| 247 | /// The needles to search for. This is intended to be an alternation of |
| 248 | /// needles. The number of needles may cause this test to be skipped for |
| 249 | /// some memchr variants. For example, a test with 2 needles cannot be used |
| 250 | /// to test `memchr`, but can be used to test `memchr2` and `memchr3`. |
| 251 | /// However, a test with only 1 needle can be used to test all of `memchr`, |
| 252 | /// `memchr2` and `memchr3`. We achieve this by filling in the needles with |
| 253 | /// bytes that we never used in the corpus (such as '#'). |
| 254 | needles: &'static [u8], |
| 255 | /// The positions expected to match for all of the needles. |
| 256 | positions: &'static [usize], |
| 257 | } |
| 258 | |
| 259 | impl Seed { |
| 260 | /// Controls how much we expand the haystack on either side for each test. |
| 261 | /// We lower this on Miri because otherwise running the tests would take |
| 262 | /// forever. |
| 263 | const EXPAND_LEN: usize = { |
| 264 | #[cfg (not(miri))] |
| 265 | { |
| 266 | 515 |
| 267 | } |
| 268 | #[cfg (miri)] |
| 269 | { |
| 270 | 6 |
| 271 | } |
| 272 | }; |
| 273 | |
| 274 | /// Expand this test into many variations of the same test. |
| 275 | /// |
| 276 | /// In particular, this will generate more tests with larger corpus sizes. |
| 277 | /// The expected positions are updated to maintain the integrity of the |
| 278 | /// test. |
| 279 | /// |
| 280 | /// This is important in testing a memchr implementation, because there are |
| 281 | /// often different cases depending on the length of the corpus. |
| 282 | /// |
| 283 | /// Note that we extend the corpus by adding `%` bytes, which we |
| 284 | /// don't otherwise use as a needle. |
| 285 | fn generate(&self) -> impl Iterator<Item = Test> { |
| 286 | let mut more = vec![]; |
| 287 | |
| 288 | // Add bytes to the start of the corpus. |
| 289 | for i in 0..Seed::EXPAND_LEN { |
| 290 | let mut t = Test::new(self); |
| 291 | let mut new: String = core::iter::repeat('%' ).take(i).collect(); |
| 292 | new.push_str(&t.haystack); |
| 293 | t.haystack = new; |
| 294 | t.expected = t.expected.into_iter().map(|p| p + i).collect(); |
| 295 | more.push(t); |
| 296 | } |
| 297 | // Add bytes to the end of the corpus. |
| 298 | for i in 1..Seed::EXPAND_LEN { |
| 299 | let mut t = Test::new(self); |
| 300 | let padding: String = core::iter::repeat('%' ).take(i).collect(); |
| 301 | t.haystack.push_str(&padding); |
| 302 | more.push(t); |
| 303 | } |
| 304 | |
| 305 | more.into_iter() |
| 306 | } |
| 307 | } |
| 308 | |