| 1 | pub(crate) trait Splitter: Copy { |
| 2 | /// If any of the needles was found in the haystack, then split the haystack at the first hit. |
| 3 | /// |
| 4 | /// Since only the first byte of a needle is inspected, be aware that there can be |
| 5 | /// false-positives. Always compare the latter string of the output if it fits the expected |
| 6 | /// prefix. |
| 7 | fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)>; |
| 8 | } |
| 9 | |
| 10 | impl<T: Splitter> Splitter for &T { |
| 11 | #[inline ] |
| 12 | fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)> { |
| 13 | T::split(self, haystack) |
| 14 | } |
| 15 | } |
| 16 | |
| 17 | // define and implement a string splitter using memchr |
| 18 | macro_rules! new_memchr_type { |
| 19 | ($struct:ident $split_unchecked:ident $memchr:ident $($field:ident)*) => { |
| 20 | #[derive(Debug, Clone, Copy)] |
| 21 | pub(crate) struct $struct { |
| 22 | $($field: u8,)* |
| 23 | } |
| 24 | |
| 25 | impl $struct { |
| 26 | #[track_caller] |
| 27 | pub(crate) fn new($($field: &str),*) -> Self { |
| 28 | Self { |
| 29 | $($field: $field.as_bytes()[0],)* |
| 30 | } |
| 31 | } |
| 32 | |
| 33 | #[inline] |
| 34 | pub(crate) fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)> { |
| 35 | // SAFETY: During the construction of `self` we used strings as inputs, and a |
| 36 | // string always starts with a byte at char boundary. |
| 37 | unsafe { $split_unchecked($(self.$field,)* haystack) } |
| 38 | } |
| 39 | } |
| 40 | |
| 41 | impl Splitter for $struct { |
| 42 | #[inline] |
| 43 | fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)> { |
| 44 | self.split(haystack) |
| 45 | } |
| 46 | } |
| 47 | |
| 48 | /// SAFETY: caller has to ensure that the needle is at a char boundary |
| 49 | pub(crate) unsafe fn $split_unchecked( |
| 50 | $($field: u8,)* |
| 51 | haystack: &str, |
| 52 | ) -> Option<(&str, &str)> { |
| 53 | let idx = memchr::$memchr($($field,)* haystack.as_bytes())?; |
| 54 | // SAFETY: The caller ensures that the needles are at char boundary. |
| 55 | // The found index `< haystack.len()`. |
| 56 | Some((haystack.get_unchecked(..idx), haystack.get_unchecked(idx..))) |
| 57 | } |
| 58 | }; |
| 59 | } |
| 60 | |
| 61 | new_memchr_type!(Splitter1 split1_unchecked memchr a); |
| 62 | new_memchr_type!(Splitter2 split2_unchecked memchr2 a b); |
| 63 | new_memchr_type!(Splitter3 split3_unchecked memchr3 a b c); |
| 64 | |
| 65 | #[test ] |
| 66 | fn candidate_finder() { |
| 67 | assert_eq!( |
| 68 | Splitter1::new("test" ).split("abctefg" ), |
| 69 | Some(("abc" , "tefg" )), |
| 70 | ); |
| 71 | assert_eq!(Splitter1::new("xyz" ).split("abctefg" ), None); |
| 72 | |
| 73 | assert_eq!( |
| 74 | Splitter2::new("xyz" , "foo" ).split("abctefg" ), |
| 75 | Some(("abcte" , "fg" )), |
| 76 | ); |
| 77 | assert_eq!(Splitter2::new("oof" , "xyz" ).split("abctefg" ), None); |
| 78 | |
| 79 | assert_eq!( |
| 80 | Splitter3::new("oof" , "apples" , "xyz" ).split("abctefg" ), |
| 81 | Some(("" , "abctefg" )), |
| 82 | ); |
| 83 | assert_eq!( |
| 84 | Splitter3::new("oof" , "peaches" , "xyz" ).split("abctefg" ), |
| 85 | None |
| 86 | ); |
| 87 | |
| 88 | assert_eq!( |
| 89 | Splitter3::new("test" , "test" , "test" ).split("abctefg" ), |
| 90 | Some(("abc" , "tefg" )), |
| 91 | ); |
| 92 | |
| 93 | assert_eq!( |
| 94 | Splitter3::new("🧚♀️Life" , "😀Laugh" , "😻Love" ) |
| 95 | .split("sed diam nonumy eirmod tempor 🧚♀️Life ut labore et dolore magna aliquyam" ), |
| 96 | Some(( |
| 97 | "sed diam nonumy eirmod tempor " , |
| 98 | "🧚♀️Life ut labore et dolore magna aliquyam" |
| 99 | )), |
| 100 | ); |
| 101 | } |
| 102 | |