1pub(crate) trait Splitter: Copy {
2 /// If any of the needles was found in the haystack, then split the haystack at the first hit.
3 ///
4 /// Since only the first byte of a needle is inspected, be aware that there can be
5 /// false-positives. Always compare the latter string of the output if it fits the expected
6 /// prefix.
7 fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)>;
8}
9
10impl<T: Splitter> Splitter for &T {
11 #[inline]
12 fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)> {
13 T::split(self, haystack)
14 }
15}
16
17// define and implement a string splitter using memchr
18macro_rules! new_memchr_type {
19 ($struct:ident $split_unchecked:ident $memchr:ident $($field:ident)*) => {
20 #[derive(Debug, Clone, Copy)]
21 pub(crate) struct $struct {
22 $($field: u8,)*
23 }
24
25 impl $struct {
26 #[track_caller]
27 pub(crate) fn new($($field: &str),*) -> Self {
28 Self {
29 $($field: $field.as_bytes()[0],)*
30 }
31 }
32
33 #[inline]
34 pub(crate) fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)> {
35 // SAFETY: During the construction of `self` we used strings as inputs, and a
36 // string always starts with a byte at char boundary.
37 unsafe { $split_unchecked($(self.$field,)* haystack) }
38 }
39 }
40
41 impl Splitter for $struct {
42 #[inline]
43 fn split<'a>(&self, haystack: &'a str) -> Option<(&'a str, &'a str)> {
44 self.split(haystack)
45 }
46 }
47
48 /// SAFETY: caller has to ensure that the needle is at a char boundary
49 pub(crate) unsafe fn $split_unchecked(
50 $($field: u8,)*
51 haystack: &str,
52 ) -> Option<(&str, &str)> {
53 let idx = memchr::$memchr($($field,)* haystack.as_bytes())?;
54 // SAFETY: The caller ensures that the needles are at char boundary.
55 // The found index `< haystack.len()`.
56 Some((haystack.get_unchecked(..idx), haystack.get_unchecked(idx..)))
57 }
58 };
59}
60
61new_memchr_type!(Splitter1 split1_unchecked memchr a);
62new_memchr_type!(Splitter2 split2_unchecked memchr2 a b);
63new_memchr_type!(Splitter3 split3_unchecked memchr3 a b c);
64
65#[test]
66fn candidate_finder() {
67 assert_eq!(
68 Splitter1::new("test").split("abctefg"),
69 Some(("abc", "tefg")),
70 );
71 assert_eq!(Splitter1::new("xyz").split("abctefg"), None);
72
73 assert_eq!(
74 Splitter2::new("xyz", "foo").split("abctefg"),
75 Some(("abcte", "fg")),
76 );
77 assert_eq!(Splitter2::new("oof", "xyz").split("abctefg"), None);
78
79 assert_eq!(
80 Splitter3::new("oof", "apples", "xyz").split("abctefg"),
81 Some(("", "abctefg")),
82 );
83 assert_eq!(
84 Splitter3::new("oof", "peaches", "xyz").split("abctefg"),
85 None
86 );
87
88 assert_eq!(
89 Splitter3::new("test", "test", "test").split("abctefg"),
90 Some(("abc", "tefg")),
91 );
92
93 assert_eq!(
94 Splitter3::new("🧚‍♀️Life", "😀Laugh", "😻Love")
95 .split("sed diam nonumy eirmod tempor 🧚‍♀️Life ut labore et dolore magna aliquyam"),
96 Some((
97 "sed diam nonumy eirmod tempor ",
98 "🧚‍♀️Life ut labore et dolore magna aliquyam"
99 )),
100 );
101}
102