1 | pub(crate) trait Splitter: Copy { |
2 | /// If any of the needles was found in the haystack, then split the haystack at the first hit. |
3 | /// |
4 | /// Since only the first byte of a needle is inspected, be aware that there can be |
5 | /// false-positives. Always compare the latter string of the output if it fits the expected |
6 | /// prefix. |
7 | fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)>; |
8 | } |
9 | |
10 | impl<T: Splitter> Splitter for &T { |
11 | #[inline ] |
12 | fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)> { |
13 | T::split(self, haystack) |
14 | } |
15 | } |
16 | |
17 | // define and implement a string splitter using memchr |
18 | macro_rules! new_memchr_type { |
19 | ($struct:ident $split_unchecked:ident $memchr:ident $($field:ident)*) => { |
20 | #[derive(Debug, Clone, Copy)] |
21 | pub(crate) struct $struct { |
22 | $($field: u8,)* |
23 | } |
24 | |
25 | impl $struct { |
26 | #[track_caller] |
27 | pub(crate) fn new($($field: &str),*) -> Self { |
28 | Self { |
29 | $($field: $field.as_bytes()[0],)* |
30 | } |
31 | } |
32 | |
33 | #[inline] |
34 | pub(crate) fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)> { |
35 | // SAFETY: During the construction of `self` we used strings as inputs, and a |
36 | // string always starts with a byte at char boundary. |
37 | unsafe { $split_unchecked($(self.$field,)* haystack) } |
38 | } |
39 | } |
40 | |
41 | impl Splitter for $struct { |
42 | #[inline] |
43 | fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)> { |
44 | self.split(haystack) |
45 | } |
46 | } |
47 | |
48 | /// SAFETY: caller has to ensure that the needle is at a char boundary |
49 | pub(crate) unsafe fn $split_unchecked( |
50 | $($field: u8,)* |
51 | haystack: &str, |
52 | ) -> Option<(&str, &str)> { |
53 | let idx = memchr::$memchr($($field,)* haystack.as_bytes())?; |
54 | // SAFETY: The caller ensures that the needles are at char boundary. |
55 | // The found index `< haystack.len()`. |
56 | Some((haystack.get_unchecked(..idx), haystack.get_unchecked(idx..))) |
57 | } |
58 | }; |
59 | } |
60 | |
61 | new_memchr_type!(Splitter1 split1_unchecked memchr a); |
62 | new_memchr_type!(Splitter2 split2_unchecked memchr2 a b); |
63 | new_memchr_type!(Splitter3 split3_unchecked memchr3 a b c); |
64 | |
65 | #[test ] |
66 | fn candidate_finder() { |
67 | assert_eq!( |
68 | Splitter1::new("test" ).split("abctefg" ), |
69 | Some(("abc" , "tefg" )), |
70 | ); |
71 | assert_eq!(Splitter1::new("xyz" ).split("abctefg" ), None); |
72 | |
73 | assert_eq!( |
74 | Splitter2::new("xyz" , "foo" ).split("abctefg" ), |
75 | Some(("abcte" , "fg" )), |
76 | ); |
77 | assert_eq!(Splitter2::new("oof" , "xyz" ).split("abctefg" ), None); |
78 | |
79 | assert_eq!( |
80 | Splitter3::new("oof" , "apples" , "xyz" ).split("abctefg" ), |
81 | Some(("" , "abctefg" )), |
82 | ); |
83 | assert_eq!( |
84 | Splitter3::new("oof" , "peaches" , "xyz" ).split("abctefg" ), |
85 | None |
86 | ); |
87 | |
88 | assert_eq!( |
89 | Splitter3::new("test" , "test" , "test" ).split("abctefg" ), |
90 | Some(("abc" , "tefg" )), |
91 | ); |
92 | |
93 | assert_eq!( |
94 | Splitter3::new("🧚♀️Life" , "😀Laugh" , "😻Love" ) |
95 | .split("sed diam nonumy eirmod tempor 🧚♀️Life ut labore et dolore magna aliquyam" ), |
96 | Some(( |
97 | "sed diam nonumy eirmod tempor " , |
98 | "🧚♀️Life ut labore et dolore magna aliquyam" |
99 | )), |
100 | ); |
101 | } |
102 | |