1/*!
2An implementation of the [Shift-Or substring search algorithm][shiftor].
3
4[shiftor]: https://en.wikipedia.org/wiki/Bitap_algorithm
5*/
6
7use alloc::boxed::Box;
8
9/// The type of our mask.
10///
11/// While we don't expose anyway to configure this in the public API, if one
12/// really needs less memory usage or support for longer needles, then it is
13/// suggested to copy the code from this module and modify it to fit your
14/// needs. The code below is written to be correct regardless of whether Mask
15/// is a u8, u16, u32, u64 or u128.
16type Mask = u16;
17
18/// A forward substring searcher using the Shift-Or algorithm.
19#[derive(Debug)]
20pub struct Finder {
21 masks: Box<[Mask; 256]>,
22 needle_len: usize,
23}
24
25impl Finder {
26 const MAX_NEEDLE_LEN: usize = (Mask::BITS - 1) as usize;
27
28 /// Create a new Shift-Or forward searcher for the given `needle`.
29 ///
30 /// The needle may be empty. The empty needle matches at every byte offset.
31 #[inline]
32 pub fn new(needle: &[u8]) -> Option<Finder> {
33 let needle_len = needle.len();
34 if needle_len > Finder::MAX_NEEDLE_LEN {
35 // A match is found when bit 7 is set in 'result' in the search
36 // routine below. So our needle can't be bigger than 7. We could
37 // permit bigger needles by using u16, u32 or u64 for our mask
38 // entries. But this is all we need for this example.
39 return None;
40 }
41 let mut searcher = Finder { masks: Box::from([!0; 256]), needle_len };
42 for (i, &byte) in needle.iter().enumerate() {
43 searcher.masks[usize::from(byte)] &= !(1 << i);
44 }
45 Some(searcher)
46 }
47
48 /// Return the first occurrence of the needle given to `Finder::new` in
49 /// the `haystack` given. If no such occurrence exists, then `None` is
50 /// returned.
51 ///
52 /// Unlike most other substring search implementations in this crate, this
53 /// finder does not require passing the needle at search time. A match can
54 /// be determined without the needle at all since the required information
55 /// is already encoded into this finder at construction time.
56 ///
57 /// The maximum value this can return is `haystack.len()`, which can only
58 /// occur when the needle and haystack both have length zero. Otherwise,
59 /// for non-empty haystacks, the maximum value is `haystack.len() - 1`.
60 #[inline]
61 pub fn find(&self, haystack: &[u8]) -> Option<usize> {
62 if self.needle_len == 0 {
63 return Some(0);
64 }
65 let mut result = !1;
66 for (i, &byte) in haystack.iter().enumerate() {
67 result |= self.masks[usize::from(byte)];
68 result <<= 1;
69 if result & (1 << self.needle_len) == 0 {
70 return Some(i + 1 - self.needle_len);
71 }
72 }
73 None
74 }
75}
76
77#[cfg(test)]
78mod tests {
79 use super::*;
80
81 define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n)?.find(h)));
82
83 #[test]
84 fn forward() {
85 crate::tests::substring::Runner::new()
86 .fwd(|h, n| Some(Finder::new(n)?.find(h)))
87 .run();
88 }
89}
90