1use crate::memmem::{rarebytes::RareNeedleBytes, NeedleInfo};
2
3mod fallback;
4#[cfg(memchr_runtime_simd)]
5mod genericsimd;
6#[cfg(all(not(miri), target_arch = "wasm32", memchr_runtime_simd))]
7mod wasm;
8#[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
9mod x86;
10
11/// The maximum frequency rank permitted for the fallback prefilter. If the
12/// rarest byte in the needle has a frequency rank above this value, then no
13/// prefilter is used if the fallback prefilter would otherwise be selected.
14const MAX_FALLBACK_RANK: usize = 250;
15
16/// A combination of prefilter effectiveness state, the prefilter function and
17/// the needle info required to run a prefilter.
18///
19/// For the most part, these are grouped into a single type for convenience,
20/// instead of needing to pass around all three as distinct function
21/// parameters.
22pub(crate) struct Pre<'a> {
23 /// State that tracks the effectiveness of a prefilter.
24 pub(crate) state: &'a mut PrefilterState,
25 /// The actual prefilter function.
26 pub(crate) prefn: PrefilterFn,
27 /// Information about a needle, such as its RK hash and rare byte offsets.
28 pub(crate) ninfo: &'a NeedleInfo,
29}
30
31impl<'a> Pre<'a> {
32 /// Call this prefilter on the given haystack with the given needle.
33 #[inline(always)]
34 pub(crate) fn call(
35 &mut self,
36 haystack: &[u8],
37 needle: &[u8],
38 ) -> Option<usize> {
39 self.prefn.call(self.state, self.ninfo, haystack, needle)
40 }
41
42 /// Return true if and only if this prefilter should be used.
43 #[inline(always)]
44 pub(crate) fn should_call(&mut self) -> bool {
45 self.state.is_effective()
46 }
47}
48
49/// A prefilter function.
50///
51/// A prefilter function describes both forward and reverse searches.
52/// (Although, we don't currently implement prefilters for reverse searching.)
53/// In the case of a forward search, the position returned corresponds to
54/// the starting offset of a match (confirmed or possible). Its minimum
55/// value is `0`, and its maximum value is `haystack.len() - 1`. In the case
56/// of a reverse search, the position returned corresponds to the position
57/// immediately after a match (confirmed or possible). Its minimum value is `1`
58/// and its maximum value is `haystack.len()`.
59///
60/// In both cases, the position returned is the starting (or ending) point of a
61/// _possible_ match. That is, returning a false positive is okay. A prefilter,
62/// however, must never return any false negatives. That is, if a match exists
63/// at a particular position `i`, then a prefilter _must_ return that position.
64/// It cannot skip past it.
65///
66/// # Safety
67///
68/// A prefilter function is not safe to create, since not all prefilters are
69/// safe to call in all contexts. (e.g., A prefilter that uses AVX instructions
70/// may only be called on x86_64 CPUs with the relevant AVX feature enabled.)
71/// Thus, callers must ensure that when a prefilter function is created that it
72/// is safe to call for the current environment.
73#[derive(Clone, Copy)]
74pub(crate) struct PrefilterFn(PrefilterFnTy);
75
76/// The type of a prefilter function. All prefilters must satisfy this
77/// signature.
78///
79/// Using a function pointer like this does inhibit inlining, but it does
80/// eliminate branching and the extra costs associated with copying a larger
81/// enum. Note also, that using Box<dyn SomePrefilterTrait> can't really work
82/// here, since we want to work in contexts that don't have dynamic memory
83/// allocation. Moreover, in the default configuration of this crate on x86_64
84/// CPUs released in the past ~decade, we will use an AVX2-optimized prefilter,
85/// which generally won't be inlineable into the surrounding code anyway.
86/// (Unless AVX2 is enabled at compile time, but this is typically rare, since
87/// it produces a non-portable binary.)
88pub(crate) type PrefilterFnTy = unsafe fn(
89 prestate: &mut PrefilterState,
90 ninfo: &NeedleInfo,
91 haystack: &[u8],
92 needle: &[u8],
93) -> Option<usize>;
94
95// If the haystack is too small for SSE2, then just run memchr on the
96// rarest byte and be done with it. (It is likely that this code path is
97// rarely exercised, since a higher level routine will probably dispatch to
98// Rabin-Karp for such a small haystack.)
99#[cfg(memchr_runtime_simd)]
100fn simple_memchr_fallback(
101 _prestate: &mut PrefilterState,
102 ninfo: &NeedleInfo,
103 haystack: &[u8],
104 needle: &[u8],
105) -> Option<usize> {
106 let (rare: usize, _) = ninfo.rarebytes.as_rare_ordered_usize();
107 crate::memchr(needle:needle[rare], haystack).map(|i: usize| i.saturating_sub(rare))
108}
109
110impl PrefilterFn {
111 /// Create a new prefilter function from the function pointer given.
112 ///
113 /// # Safety
114 ///
115 /// Callers must ensure that the given prefilter function is safe to call
116 /// for all inputs in the current environment. For example, if the given
117 /// prefilter function uses AVX instructions, then the caller must ensure
118 /// that the appropriate AVX CPU features are enabled.
119 pub(crate) unsafe fn new(prefn: PrefilterFnTy) -> PrefilterFn {
120 PrefilterFn(prefn)
121 }
122
123 /// Call the underlying prefilter function with the given arguments.
124 pub fn call(
125 self,
126 prestate: &mut PrefilterState,
127 ninfo: &NeedleInfo,
128 haystack: &[u8],
129 needle: &[u8],
130 ) -> Option<usize> {
131 // SAFETY: Callers have the burden of ensuring that a prefilter
132 // function is safe to call for all inputs in the current environment.
133 unsafe { (self.0)(prestate, ninfo, haystack, needle) }
134 }
135}
136
137impl core::fmt::Debug for PrefilterFn {
138 fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
139 "<prefilter-fn(...)>".fmt(f)
140 }
141}
142
143/// Prefilter controls whether heuristics are used to accelerate searching.
144///
145/// A prefilter refers to the idea of detecting candidate matches very quickly,
146/// and then confirming whether those candidates are full matches. This
147/// idea can be quite effective since it's often the case that looking for
148/// candidates can be a lot faster than running a complete substring search
149/// over the entire input. Namely, looking for candidates can be done with
150/// extremely fast vectorized code.
151///
152/// The downside of a prefilter is that it assumes false positives (which are
153/// candidates generated by a prefilter that aren't matches) are somewhat rare
154/// relative to the frequency of full matches. That is, if a lot of false
155/// positives are generated, then it's possible for search time to be worse
156/// than if the prefilter wasn't enabled in the first place.
157///
158/// Another downside of a prefilter is that it can result in highly variable
159/// performance, where some cases are extraordinarily fast and others aren't.
160/// Typically, variable performance isn't a problem, but it may be for your use
161/// case.
162///
163/// The use of prefilters in this implementation does use a heuristic to detect
164/// when a prefilter might not be carrying its weight, and will dynamically
165/// disable its use. Nevertheless, this configuration option gives callers
166/// the ability to disable prefilters if you have knowledge that they won't be
167/// useful.
168#[derive(Clone, Copy, Debug)]
169#[non_exhaustive]
170pub enum Prefilter {
171 /// Never used a prefilter in substring search.
172 None,
173 /// Automatically detect whether a heuristic prefilter should be used. If
174 /// it is used, then heuristics will be used to dynamically disable the
175 /// prefilter if it is believed to not be carrying its weight.
176 Auto,
177}
178
179impl Default for Prefilter {
180 fn default() -> Prefilter {
181 Prefilter::Auto
182 }
183}
184
185impl Prefilter {
186 pub(crate) fn is_none(&self) -> bool {
187 match *self {
188 Prefilter::None => true,
189 _ => false,
190 }
191 }
192}
193
194/// PrefilterState tracks state associated with the effectiveness of a
195/// prefilter. It is used to track how many bytes, on average, are skipped by
196/// the prefilter. If this average dips below a certain threshold over time,
197/// then the state renders the prefilter inert and stops using it.
198///
199/// A prefilter state should be created for each search. (Where creating an
200/// iterator is treated as a single search.) A prefilter state should only be
201/// created from a `Freqy`. e.g., An inert `Freqy` will produce an inert
202/// `PrefilterState`.
203#[derive(Clone, Debug)]
204pub(crate) struct PrefilterState {
205 /// The number of skips that has been executed. This is always 1 greater
206 /// than the actual number of skips. The special sentinel value of 0
207 /// indicates that the prefilter is inert. This is useful to avoid
208 /// additional checks to determine whether the prefilter is still
209 /// "effective." Once a prefilter becomes inert, it should no longer be
210 /// used (according to our heuristics).
211 skips: u32,
212 /// The total number of bytes that have been skipped.
213 skipped: u32,
214}
215
216impl PrefilterState {
217 /// The minimum number of skip attempts to try before considering whether
218 /// a prefilter is effective or not.
219 const MIN_SKIPS: u32 = 50;
220
221 /// The minimum amount of bytes that skipping must average.
222 ///
223 /// This value was chosen based on varying it and checking
224 /// the microbenchmarks. In particular, this can impact the
225 /// pathological/repeated-{huge,small} benchmarks quite a bit if it's set
226 /// too low.
227 const MIN_SKIP_BYTES: u32 = 8;
228
229 /// Create a fresh prefilter state.
230 pub(crate) fn new() -> PrefilterState {
231 PrefilterState { skips: 1, skipped: 0 }
232 }
233
234 /// Create a fresh prefilter state that is always inert.
235 pub(crate) fn inert() -> PrefilterState {
236 PrefilterState { skips: 0, skipped: 0 }
237 }
238
239 /// Update this state with the number of bytes skipped on the last
240 /// invocation of the prefilter.
241 #[inline]
242 pub(crate) fn update(&mut self, skipped: usize) {
243 self.skips = self.skips.saturating_add(1);
244 // We need to do this dance since it's technically possible for
245 // `skipped` to overflow a `u32`. (And we use a `u32` to reduce the
246 // size of a prefilter state.)
247 if skipped > core::u32::MAX as usize {
248 self.skipped = core::u32::MAX;
249 } else {
250 self.skipped = self.skipped.saturating_add(skipped as u32);
251 }
252 }
253
254 /// Return true if and only if this state indicates that a prefilter is
255 /// still effective.
256 #[inline]
257 pub(crate) fn is_effective(&mut self) -> bool {
258 if self.is_inert() {
259 return false;
260 }
261 if self.skips() < PrefilterState::MIN_SKIPS {
262 return true;
263 }
264 if self.skipped >= PrefilterState::MIN_SKIP_BYTES * self.skips() {
265 return true;
266 }
267
268 // We're inert.
269 self.skips = 0;
270 false
271 }
272
273 #[inline]
274 fn is_inert(&self) -> bool {
275 self.skips == 0
276 }
277
278 #[inline]
279 fn skips(&self) -> u32 {
280 self.skips.saturating_sub(1)
281 }
282}
283
284/// Determine which prefilter function, if any, to use.
285///
286/// This only applies to x86_64 when runtime SIMD detection is enabled (which
287/// is the default). In general, we try to use an AVX prefilter, followed by
288/// SSE and then followed by a generic one based on memchr.
289#[inline(always)]
290pub(crate) fn forward(
291 config: &Prefilter,
292 rare: &RareNeedleBytes,
293 needle: &[u8],
294) -> Option<PrefilterFn> {
295 if config.is_none() || needle.len() <= 1 {
296 return None;
297 }
298
299 #[cfg(all(not(miri), target_arch = "x86_64", memchr_runtime_simd))]
300 {
301 #[cfg(feature = "std")]
302 {
303 if cfg!(memchr_runtime_avx) {
304 if is_x86_feature_detected!("avx2") {
305 // SAFETY: x86::avx::find only requires the avx2 feature,
306 // which we've just checked above.
307 return unsafe { Some(PrefilterFn::new(x86::avx::find)) };
308 }
309 }
310 }
311 if cfg!(memchr_runtime_sse2) {
312 // SAFETY: x86::sse::find only requires the sse2 feature, which is
313 // guaranteed to be available on x86_64.
314 return unsafe { Some(PrefilterFn::new(x86::sse::find)) };
315 }
316 }
317 #[cfg(all(not(miri), target_arch = "wasm32", memchr_runtime_simd))]
318 {
319 // SAFETY: `wasm::find` is actually a safe function
320 //
321 // Also note that the `if true` is here to prevent, on wasm with simd,
322 // rustc warning about the code below being dead code.
323 if true {
324 return unsafe { Some(PrefilterFn::new(wasm::find)) };
325 }
326 }
327 // Check that our rarest byte has a reasonably low rank. The main issue
328 // here is that the fallback prefilter can perform pretty poorly if it's
329 // given common bytes. So we try to avoid the worst cases here.
330 let (rare1_rank, _) = rare.as_ranks(needle);
331 if rare1_rank <= MAX_FALLBACK_RANK {
332 // SAFETY: fallback::find is safe to call in all environments.
333 return unsafe { Some(PrefilterFn::new(fallback::find)) };
334 }
335 None
336}
337
338/// Return the minimum length of the haystack in which a prefilter should be
339/// used. If the haystack is below this length, then it's probably not worth
340/// the overhead of running the prefilter.
341///
342/// We used to look at the length of a haystack here. That is, if it was too
343/// small, then don't bother with the prefilter. But two things changed:
344/// the prefilter falls back to memchr for small haystacks, and, at the
345/// meta-searcher level, Rabin-Karp is employed for tiny haystacks anyway.
346///
347/// We keep it around for now in case we want to bring it back.
348#[allow(dead_code)]
349pub(crate) fn minimum_len(_haystack: &[u8], needle: &[u8]) -> usize {
350 // If the haystack length isn't greater than needle.len() * FACTOR, then
351 // no prefilter will be used. The presumption here is that since there
352 // are so few bytes to check, it's not worth running the prefilter since
353 // there will need to be a validation step anyway. Thus, the prefilter is
354 // largely redundant work.
355 //
356 // Increase the factor noticeably hurts the
357 // memmem/krate/prebuilt/teeny-*/never-john-watson benchmarks.
358 const PREFILTER_LENGTH_FACTOR: usize = 2;
359 const VECTOR_MIN_LENGTH: usize = 16;
360 let min: usize = core::cmp::max(
361 VECTOR_MIN_LENGTH,
362 PREFILTER_LENGTH_FACTOR * needle.len(),
363 );
364 // For haystacks with length==min, we still want to avoid the prefilter,
365 // so add 1.
366 min + 1
367}
368
369#[cfg(all(test, feature = "std", not(miri)))]
370pub(crate) mod tests {
371 use std::convert::{TryFrom, TryInto};
372
373 use super::*;
374 use crate::memmem::{
375 prefilter::PrefilterFnTy, rabinkarp, rarebytes::RareNeedleBytes,
376 };
377
378 // Below is a small jig that generates prefilter tests. The main purpose
379 // of this jig is to generate tests of varying needle/haystack lengths
380 // in order to try and exercise all code paths in our prefilters. And in
381 // particular, this is especially important for vectorized prefilters where
382 // certain code paths might only be exercised at certain lengths.
383
384 /// A test that represents the input and expected output to a prefilter
385 /// function. The test should be able to run with any prefilter function
386 /// and get the expected output.
387 pub(crate) struct PrefilterTest {
388 // These fields represent the inputs and expected output of a forwards
389 // prefilter function.
390 pub(crate) ninfo: NeedleInfo,
391 pub(crate) haystack: Vec<u8>,
392 pub(crate) needle: Vec<u8>,
393 pub(crate) output: Option<usize>,
394 }
395
396 impl PrefilterTest {
397 /// Run all generated forward prefilter tests on the given prefn.
398 ///
399 /// # Safety
400 ///
401 /// Callers must ensure that the given prefilter function pointer is
402 /// safe to call for all inputs in the current environment.
403 pub(crate) unsafe fn run_all_tests(prefn: PrefilterFnTy) {
404 PrefilterTest::run_all_tests_filter(prefn, |_| true)
405 }
406
407 /// Run all generated forward prefilter tests that pass the given
408 /// predicate on the given prefn.
409 ///
410 /// # Safety
411 ///
412 /// Callers must ensure that the given prefilter function pointer is
413 /// safe to call for all inputs in the current environment.
414 pub(crate) unsafe fn run_all_tests_filter(
415 prefn: PrefilterFnTy,
416 mut predicate: impl FnMut(&PrefilterTest) -> bool,
417 ) {
418 for seed in PREFILTER_TEST_SEEDS {
419 for test in seed.generate() {
420 if predicate(&test) {
421 test.run(prefn);
422 }
423 }
424 }
425 }
426
427 /// Create a new prefilter test from a seed and some chose offsets to
428 /// rare bytes in the seed's needle.
429 ///
430 /// If a valid test could not be constructed, then None is returned.
431 /// (Currently, we take the approach of massaging tests to be valid
432 /// instead of rejecting them outright.)
433 fn new(
434 seed: PrefilterTestSeed,
435 rare1i: usize,
436 rare2i: usize,
437 haystack_len: usize,
438 needle_len: usize,
439 output: Option<usize>,
440 ) -> Option<PrefilterTest> {
441 let mut rare1i: u8 = rare1i.try_into().unwrap();
442 let mut rare2i: u8 = rare2i.try_into().unwrap();
443 // The '#' byte is never used in a haystack (unless we're expecting
444 // a match), while the '@' byte is never used in a needle.
445 let mut haystack = vec![b'@'; haystack_len];
446 let mut needle = vec![b'#'; needle_len];
447 needle[0] = seed.first;
448 needle[rare1i as usize] = seed.rare1;
449 needle[rare2i as usize] = seed.rare2;
450 // If we're expecting a match, then make sure the needle occurs
451 // in the haystack at the expected position.
452 if let Some(i) = output {
453 haystack[i..i + needle.len()].copy_from_slice(&needle);
454 }
455 // If the operations above lead to rare offsets pointing to the
456 // non-first occurrence of a byte, then adjust it. This might lead
457 // to redundant tests, but it's simpler than trying to change the
458 // generation process I think.
459 if let Some(i) = crate::memchr(seed.rare1, &needle) {
460 rare1i = u8::try_from(i).unwrap();
461 }
462 if let Some(i) = crate::memchr(seed.rare2, &needle) {
463 rare2i = u8::try_from(i).unwrap();
464 }
465 let ninfo = NeedleInfo {
466 rarebytes: RareNeedleBytes::new(rare1i, rare2i),
467 nhash: rabinkarp::NeedleHash::forward(&needle),
468 };
469 Some(PrefilterTest { ninfo, haystack, needle, output })
470 }
471
472 /// Run this specific test on the given prefilter function. If the
473 /// outputs do no match, then this routine panics with a failure
474 /// message.
475 ///
476 /// # Safety
477 ///
478 /// Callers must ensure that the given prefilter function pointer is
479 /// safe to call for all inputs in the current environment.
480 unsafe fn run(&self, prefn: PrefilterFnTy) {
481 let mut prestate = PrefilterState::new();
482 assert_eq!(
483 self.output,
484 prefn(
485 &mut prestate,
486 &self.ninfo,
487 &self.haystack,
488 &self.needle
489 ),
490 "ninfo: {:?}, haystack(len={}): {:?}, needle(len={}): {:?}",
491 self.ninfo,
492 self.haystack.len(),
493 std::str::from_utf8(&self.haystack).unwrap(),
494 self.needle.len(),
495 std::str::from_utf8(&self.needle).unwrap(),
496 );
497 }
498 }
499
500 /// A set of prefilter test seeds. Each seed serves as the base for the
501 /// generation of many other tests. In essence, the seed captures the
502 /// "rare" and first bytes among our needle. The tests generated from each
503 /// seed essentially vary the length of the needle and haystack, while
504 /// using the rare/first byte configuration from the seed.
505 ///
506 /// The purpose of this is to test many different needle/haystack lengths.
507 /// In particular, some of the vector optimizations might only have bugs
508 /// in haystacks of a certain size.
509 const PREFILTER_TEST_SEEDS: &[PrefilterTestSeed] = &[
510 PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'z' },
511 PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'z' },
512 PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'x' },
513 PrefilterTestSeed { first: b'x', rare1: b'x', rare2: b'x' },
514 PrefilterTestSeed { first: b'x', rare1: b'y', rare2: b'y' },
515 ];
516
517 /// Data that describes a single prefilter test seed.
518 #[derive(Clone, Copy)]
519 struct PrefilterTestSeed {
520 first: u8,
521 rare1: u8,
522 rare2: u8,
523 }
524
525 impl PrefilterTestSeed {
526 /// Generate a series of prefilter tests from this seed.
527 fn generate(self) -> impl Iterator<Item = PrefilterTest> {
528 let len_start = 2;
529 // The iterator below generates *a lot* of tests. The number of
530 // tests was chosen somewhat empirically to be "bearable" when
531 // running the test suite.
532 //
533 // We use an iterator here because the collective haystacks of all
534 // these test cases add up to enough memory to OOM a conservative
535 // sandbox or a small laptop.
536 (len_start..=40).flat_map(move |needle_len| {
537 let rare_start = len_start - 1;
538 (rare_start..needle_len).flat_map(move |rare1i| {
539 (rare1i..needle_len).flat_map(move |rare2i| {
540 (needle_len..=66).flat_map(move |haystack_len| {
541 PrefilterTest::new(
542 self,
543 rare1i,
544 rare2i,
545 haystack_len,
546 needle_len,
547 None,
548 )
549 .into_iter()
550 .chain(
551 (0..=(haystack_len - needle_len)).flat_map(
552 move |output| {
553 PrefilterTest::new(
554 self,
555 rare1i,
556 rare2i,
557 haystack_len,
558 needle_len,
559 Some(output),
560 )
561 },
562 ),
563 )
564 })
565 })
566 })
567 })
568 }
569 }
570}
571