1 | /*! |
2 | Contains architecture independent routines. |
3 | |
4 | These routines are often used as a "fallback" implementation when the more |
5 | specialized architecture dependent routines are unavailable. |
6 | */ |
7 | |
8 | pub mod memchr; |
9 | pub mod packedpair; |
10 | pub mod rabinkarp; |
11 | #[cfg (feature = "alloc" )] |
12 | pub mod shiftor; |
13 | pub mod twoway; |
14 | |
15 | /// Returns true if and only if `needle` is a prefix of `haystack`. |
16 | /// |
17 | /// This uses a latency optimized variant of `memcmp` internally which *might* |
18 | /// make this faster for very short strings. |
19 | /// |
20 | /// # Inlining |
21 | /// |
22 | /// This routine is marked `inline(always)`. If you want to call this function |
23 | /// in a way that is not always inlined, you'll need to wrap a call to it in |
24 | /// another function that is marked as `inline(never)` or just `inline`. |
25 | #[inline (always)] |
26 | pub fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { |
27 | needle.len() <= haystack.len() |
28 | && is_equal(&haystack[..needle.len()], needle) |
29 | } |
30 | |
31 | /// Returns true if and only if `needle` is a suffix of `haystack`. |
32 | /// |
33 | /// This uses a latency optimized variant of `memcmp` internally which *might* |
34 | /// make this faster for very short strings. |
35 | /// |
36 | /// # Inlining |
37 | /// |
38 | /// This routine is marked `inline(always)`. If you want to call this function |
39 | /// in a way that is not always inlined, you'll need to wrap a call to it in |
40 | /// another function that is marked as `inline(never)` or just `inline`. |
41 | #[inline (always)] |
42 | pub fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool { |
43 | needle.len() <= haystack.len() |
44 | && is_equal(&haystack[haystack.len() - needle.len()..], needle) |
45 | } |
46 | |
47 | /// Compare corresponding bytes in `x` and `y` for equality. |
48 | /// |
49 | /// That is, this returns true if and only if `x.len() == y.len()` and |
50 | /// `x[i] == y[i]` for all `0 <= i < x.len()`. |
51 | /// |
52 | /// # Inlining |
53 | /// |
54 | /// This routine is marked `inline(always)`. If you want to call this function |
55 | /// in a way that is not always inlined, you'll need to wrap a call to it in |
56 | /// another function that is marked as `inline(never)` or just `inline`. |
57 | /// |
58 | /// # Motivation |
59 | /// |
60 | /// Why not use slice equality instead? Well, slice equality usually results in |
61 | /// a call out to the current platform's `libc` which might not be inlineable |
62 | /// or have other overhead. This routine isn't guaranteed to be a win, but it |
63 | /// might be in some cases. |
64 | #[inline (always)] |
65 | pub fn is_equal(x: &[u8], y: &[u8]) -> bool { |
66 | if x.len() != y.len() { |
67 | return false; |
68 | } |
69 | // SAFETY: Our pointers are derived directly from borrowed slices which |
70 | // uphold all of our safety guarantees except for length. We account for |
71 | // length with the check above. |
72 | unsafe { is_equal_raw(x.as_ptr(), y.as_ptr(), x.len()) } |
73 | } |
74 | |
75 | /// Compare `n` bytes at the given pointers for equality. |
76 | /// |
77 | /// This returns true if and only if `*x.add(i) == *y.add(i)` for all |
78 | /// `0 <= i < n`. |
79 | /// |
80 | /// # Inlining |
81 | /// |
82 | /// This routine is marked `inline(always)`. If you want to call this function |
83 | /// in a way that is not always inlined, you'll need to wrap a call to it in |
84 | /// another function that is marked as `inline(never)` or just `inline`. |
85 | /// |
86 | /// # Motivation |
87 | /// |
88 | /// Why not use slice equality instead? Well, slice equality usually results in |
89 | /// a call out to the current platform's `libc` which might not be inlineable |
90 | /// or have other overhead. This routine isn't guaranteed to be a win, but it |
91 | /// might be in some cases. |
92 | /// |
93 | /// # Safety |
94 | /// |
95 | /// * Both `x` and `y` must be valid for reads of up to `n` bytes. |
96 | /// * Both `x` and `y` must point to an initialized value. |
97 | /// * Both `x` and `y` must each point to an allocated object and |
98 | /// must either be in bounds or at most one byte past the end of the |
99 | /// allocated object. `x` and `y` do not need to point to the same allocated |
100 | /// object, but they may. |
101 | /// * Both `x` and `y` must be _derived from_ a pointer to their respective |
102 | /// allocated objects. |
103 | /// * The distance between `x` and `x+n` must not overflow `isize`. Similarly |
104 | /// for `y` and `y+n`. |
105 | /// * The distance being in bounds must not rely on "wrapping around" the |
106 | /// address space. |
107 | #[inline (always)] |
108 | pub unsafe fn is_equal_raw( |
109 | mut x: *const u8, |
110 | mut y: *const u8, |
111 | n: usize, |
112 | ) -> bool { |
113 | // If we don't have enough bytes to do 4-byte at a time loads, then |
114 | // handle each possible length specially. Note that I used to have a |
115 | // byte-at-a-time loop here and that turned out to be quite a bit slower |
116 | // for the memmem/pathological/defeat-simple-vector-alphabet benchmark. |
117 | if n < 4 { |
118 | return match n { |
119 | 0 => true, |
120 | 1 => x.read() == y.read(), |
121 | 2 => { |
122 | x.cast::<u16>().read_unaligned() |
123 | == y.cast::<u16>().read_unaligned() |
124 | } |
125 | // I also tried copy_nonoverlapping here and it looks like the |
126 | // codegen is the same. |
127 | 3 => x.cast::<[u8; 3]>().read() == y.cast::<[u8; 3]>().read(), |
128 | _ => unreachable!(), |
129 | }; |
130 | } |
131 | // When we have 4 or more bytes to compare, then proceed in chunks of 4 at |
132 | // a time using unaligned loads. |
133 | // |
134 | // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is |
135 | // that this particular version of memcmp is likely to be called with tiny |
136 | // needles. That means that if we do 8 byte loads, then a higher proportion |
137 | // of memcmp calls will use the slower variant above. With that said, this |
138 | // is a hypothesis and is only loosely supported by benchmarks. There's |
139 | // likely some improvement that could be made here. The main thing here |
140 | // though is to optimize for latency, not throughput. |
141 | |
142 | // SAFETY: The caller is responsible for ensuring the pointers we get are |
143 | // valid and readable for at least `n` bytes. We also do unaligned loads, |
144 | // so there's no need to ensure we're aligned. (This is justified by this |
145 | // routine being specifically for short strings.) |
146 | let xend = x.add(n.wrapping_sub(4)); |
147 | let yend = y.add(n.wrapping_sub(4)); |
148 | while x < xend { |
149 | let vx = x.cast::<u32>().read_unaligned(); |
150 | let vy = y.cast::<u32>().read_unaligned(); |
151 | if vx != vy { |
152 | return false; |
153 | } |
154 | x = x.add(4); |
155 | y = y.add(4); |
156 | } |
157 | let vx = xend.cast::<u32>().read_unaligned(); |
158 | let vy = yend.cast::<u32>().read_unaligned(); |
159 | vx == vy |
160 | } |
161 | |
162 | #[cfg (test)] |
163 | mod tests { |
164 | use super::*; |
165 | |
166 | #[test] |
167 | fn equals_different_lengths() { |
168 | assert!(!is_equal(b"" , b"a" )); |
169 | assert!(!is_equal(b"a" , b"" )); |
170 | assert!(!is_equal(b"ab" , b"a" )); |
171 | assert!(!is_equal(b"a" , b"ab" )); |
172 | } |
173 | |
174 | #[test] |
175 | fn equals_mismatch() { |
176 | let one_mismatch = [ |
177 | (&b"a" [..], &b"x" [..]), |
178 | (&b"ab" [..], &b"ax" [..]), |
179 | (&b"abc" [..], &b"abx" [..]), |
180 | (&b"abcd" [..], &b"abcx" [..]), |
181 | (&b"abcde" [..], &b"abcdx" [..]), |
182 | (&b"abcdef" [..], &b"abcdex" [..]), |
183 | (&b"abcdefg" [..], &b"abcdefx" [..]), |
184 | (&b"abcdefgh" [..], &b"abcdefgx" [..]), |
185 | (&b"abcdefghi" [..], &b"abcdefghx" [..]), |
186 | (&b"abcdefghij" [..], &b"abcdefghix" [..]), |
187 | (&b"abcdefghijk" [..], &b"abcdefghijx" [..]), |
188 | (&b"abcdefghijkl" [..], &b"abcdefghijkx" [..]), |
189 | (&b"abcdefghijklm" [..], &b"abcdefghijklx" [..]), |
190 | (&b"abcdefghijklmn" [..], &b"abcdefghijklmx" [..]), |
191 | ]; |
192 | for (x, y) in one_mismatch { |
193 | assert_eq!(x.len(), y.len(), "lengths should match" ); |
194 | assert!(!is_equal(x, y)); |
195 | assert!(!is_equal(y, x)); |
196 | } |
197 | } |
198 | |
199 | #[test] |
200 | fn equals_yes() { |
201 | assert!(is_equal(b"" , b"" )); |
202 | assert!(is_equal(b"a" , b"a" )); |
203 | assert!(is_equal(b"ab" , b"ab" )); |
204 | assert!(is_equal(b"abc" , b"abc" )); |
205 | assert!(is_equal(b"abcd" , b"abcd" )); |
206 | assert!(is_equal(b"abcde" , b"abcde" )); |
207 | assert!(is_equal(b"abcdef" , b"abcdef" )); |
208 | assert!(is_equal(b"abcdefg" , b"abcdefg" )); |
209 | assert!(is_equal(b"abcdefgh" , b"abcdefgh" )); |
210 | assert!(is_equal(b"abcdefghi" , b"abcdefghi" )); |
211 | } |
212 | |
213 | #[test] |
214 | fn prefix() { |
215 | assert!(is_prefix(b"" , b"" )); |
216 | assert!(is_prefix(b"a" , b"" )); |
217 | assert!(is_prefix(b"ab" , b"" )); |
218 | assert!(is_prefix(b"foo" , b"foo" )); |
219 | assert!(is_prefix(b"foobar" , b"foo" )); |
220 | |
221 | assert!(!is_prefix(b"foo" , b"fob" )); |
222 | assert!(!is_prefix(b"foobar" , b"fob" )); |
223 | } |
224 | |
225 | #[test] |
226 | fn suffix() { |
227 | assert!(is_suffix(b"" , b"" )); |
228 | assert!(is_suffix(b"a" , b"" )); |
229 | assert!(is_suffix(b"ab" , b"" )); |
230 | assert!(is_suffix(b"foo" , b"foo" )); |
231 | assert!(is_suffix(b"foobar" , b"bar" )); |
232 | |
233 | assert!(!is_suffix(b"foo" , b"goo" )); |
234 | assert!(!is_suffix(b"foobar" , b"gar" )); |
235 | } |
236 | } |
237 | |