1 | /*! |
2 | Contains architecture independent routines. |
3 | |
4 | These routines are often used as a "fallback" implementation when the more |
5 | specialized architecture dependent routines are unavailable. |
6 | */ |
7 | |
8 | pub mod memchr; |
9 | pub mod packedpair; |
10 | pub mod rabinkarp; |
11 | #[cfg (feature = "alloc" )] |
12 | pub mod shiftor; |
13 | pub mod twoway; |
14 | |
15 | /// Returns true if and only if `needle` is a prefix of `haystack`. |
16 | /// |
17 | /// This uses a latency optimized variant of `memcmp` internally which *might* |
18 | /// make this faster for very short strings. |
19 | /// |
20 | /// # Inlining |
21 | /// |
22 | /// This routine is marked `inline(always)`. If you want to call this function |
23 | /// in a way that is not always inlined, you'll need to wrap a call to it in |
24 | /// another function that is marked as `inline(never)` or just `inline`. |
25 | #[inline (always)] |
26 | pub fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { |
27 | needle.len() <= haystack.len() |
28 | && is_equal(&haystack[..needle.len()], y:needle) |
29 | } |
30 | |
31 | /// Returns true if and only if `needle` is a suffix of `haystack`. |
32 | /// |
33 | /// This uses a latency optimized variant of `memcmp` internally which *might* |
34 | /// make this faster for very short strings. |
35 | /// |
36 | /// # Inlining |
37 | /// |
38 | /// This routine is marked `inline(always)`. If you want to call this function |
39 | /// in a way that is not always inlined, you'll need to wrap a call to it in |
40 | /// another function that is marked as `inline(never)` or just `inline`. |
41 | #[inline (always)] |
42 | pub fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool { |
43 | needle.len() <= haystack.len() |
44 | && is_equal(&haystack[haystack.len() - needle.len()..], y:needle) |
45 | } |
46 | |
47 | /// Compare corresponding bytes in `x` and `y` for equality. |
48 | /// |
49 | /// That is, this returns true if and only if `x.len() == y.len()` and |
50 | /// `x[i] == y[i]` for all `0 <= i < x.len()`. |
51 | /// |
52 | /// # Inlining |
53 | /// |
54 | /// This routine is marked `inline(always)`. If you want to call this function |
55 | /// in a way that is not always inlined, you'll need to wrap a call to it in |
56 | /// another function that is marked as `inline(never)` or just `inline`. |
57 | /// |
58 | /// # Motivation |
59 | /// |
60 | /// Why not use slice equality instead? Well, slice equality usually results in |
61 | /// a call out to the current platform's `libc` which might not be inlineable |
62 | /// or have other overhead. This routine isn't guaranteed to be a win, but it |
63 | /// might be in some cases. |
64 | #[inline (always)] |
65 | pub fn is_equal(x: &[u8], y: &[u8]) -> bool { |
66 | if x.len() != y.len() { |
67 | return false; |
68 | } |
69 | // SAFETY: Our pointers are derived directly from borrowed slices which |
70 | // uphold all of our safety guarantees except for length. We account for |
71 | // length with the check above. |
72 | unsafe { is_equal_raw(x:x.as_ptr(), y:y.as_ptr(), n:x.len()) } |
73 | } |
74 | |
75 | /// Compare `n` bytes at the given pointers for equality. |
76 | /// |
77 | /// This returns true if and only if `*x.add(i) == *y.add(i)` for all |
78 | /// `0 <= i < n`. |
79 | /// |
80 | /// # Inlining |
81 | /// |
82 | /// This routine is marked `inline(always)`. If you want to call this function |
83 | /// in a way that is not always inlined, you'll need to wrap a call to it in |
84 | /// another function that is marked as `inline(never)` or just `inline`. |
85 | /// |
86 | /// # Motivation |
87 | /// |
88 | /// Why not use slice equality instead? Well, slice equality usually results in |
89 | /// a call out to the current platform's `libc` which might not be inlineable |
90 | /// or have other overhead. This routine isn't guaranteed to be a win, but it |
91 | /// might be in some cases. |
92 | /// |
93 | /// # Safety |
94 | /// |
95 | /// * Both `x` and `y` must be valid for reads of up to `n` bytes. |
96 | /// * Both `x` and `y` must point to an initialized value. |
97 | /// * Both `x` and `y` must each point to an allocated object and |
98 | /// must either be in bounds or at most one byte past the end of the |
99 | /// allocated object. `x` and `y` do not need to point to the same allocated |
100 | /// object, but they may. |
101 | /// * Both `x` and `y` must be _derived from_ a pointer to their respective |
102 | /// allocated objects. |
103 | /// * The distance between `x` and `x+n` must not overflow `isize`. Similarly |
104 | /// for `y` and `y+n`. |
105 | /// * The distance being in bounds must not rely on "wrapping around" the |
106 | /// address space. |
107 | #[inline (always)] |
108 | pub unsafe fn is_equal_raw( |
109 | mut x: *const u8, |
110 | mut y: *const u8, |
111 | mut n: usize, |
112 | ) -> bool { |
113 | // When we have 4 or more bytes to compare, then proceed in chunks of 4 at |
114 | // a time using unaligned loads. |
115 | // |
116 | // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is |
117 | // that this particular version of memcmp is likely to be called with tiny |
118 | // needles. That means that if we do 8 byte loads, then a higher proportion |
119 | // of memcmp calls will use the slower variant above. With that said, this |
120 | // is a hypothesis and is only loosely supported by benchmarks. There's |
121 | // likely some improvement that could be made here. The main thing here |
122 | // though is to optimize for latency, not throughput. |
123 | |
124 | // SAFETY: The caller is responsible for ensuring the pointers we get are |
125 | // valid and readable for at least `n` bytes. We also do unaligned loads, |
126 | // so there's no need to ensure we're aligned. (This is justified by this |
127 | // routine being specifically for short strings.) |
128 | while n >= 4 { |
129 | let vx = x.cast::<u32>().read_unaligned(); |
130 | let vy = y.cast::<u32>().read_unaligned(); |
131 | if vx != vy { |
132 | return false; |
133 | } |
134 | x = x.add(4); |
135 | y = y.add(4); |
136 | n -= 4; |
137 | } |
138 | // If we don't have enough bytes to do 4-byte at a time loads, then |
139 | // do partial loads. Note that I used to have a byte-at-a-time |
140 | // loop here and that turned out to be quite a bit slower for the |
141 | // memmem/pathological/defeat-simple-vector-alphabet benchmark. |
142 | if n >= 2 { |
143 | let vx = x.cast::<u16>().read_unaligned(); |
144 | let vy = y.cast::<u16>().read_unaligned(); |
145 | if vx != vy { |
146 | return false; |
147 | } |
148 | x = x.add(2); |
149 | y = y.add(2); |
150 | n -= 2; |
151 | } |
152 | if n > 0 { |
153 | if x.read() != y.read() { |
154 | return false; |
155 | } |
156 | } |
157 | true |
158 | } |
159 | |
160 | #[cfg (test)] |
161 | mod tests { |
162 | use super::*; |
163 | |
164 | #[test ] |
165 | fn equals_different_lengths() { |
166 | assert!(!is_equal(b"" , b"a" )); |
167 | assert!(!is_equal(b"a" , b"" )); |
168 | assert!(!is_equal(b"ab" , b"a" )); |
169 | assert!(!is_equal(b"a" , b"ab" )); |
170 | } |
171 | |
172 | #[test ] |
173 | fn equals_mismatch() { |
174 | let one_mismatch = [ |
175 | (&b"a" [..], &b"x" [..]), |
176 | (&b"ab" [..], &b"ax" [..]), |
177 | (&b"abc" [..], &b"abx" [..]), |
178 | (&b"abcd" [..], &b"abcx" [..]), |
179 | (&b"abcde" [..], &b"abcdx" [..]), |
180 | (&b"abcdef" [..], &b"abcdex" [..]), |
181 | (&b"abcdefg" [..], &b"abcdefx" [..]), |
182 | (&b"abcdefgh" [..], &b"abcdefgx" [..]), |
183 | (&b"abcdefghi" [..], &b"abcdefghx" [..]), |
184 | (&b"abcdefghij" [..], &b"abcdefghix" [..]), |
185 | (&b"abcdefghijk" [..], &b"abcdefghijx" [..]), |
186 | (&b"abcdefghijkl" [..], &b"abcdefghijkx" [..]), |
187 | (&b"abcdefghijklm" [..], &b"abcdefghijklx" [..]), |
188 | (&b"abcdefghijklmn" [..], &b"abcdefghijklmx" [..]), |
189 | ]; |
190 | for (x, y) in one_mismatch { |
191 | assert_eq!(x.len(), y.len(), "lengths should match" ); |
192 | assert!(!is_equal(x, y)); |
193 | assert!(!is_equal(y, x)); |
194 | } |
195 | } |
196 | |
197 | #[test ] |
198 | fn equals_yes() { |
199 | assert!(is_equal(b"" , b"" )); |
200 | assert!(is_equal(b"a" , b"a" )); |
201 | assert!(is_equal(b"ab" , b"ab" )); |
202 | assert!(is_equal(b"abc" , b"abc" )); |
203 | assert!(is_equal(b"abcd" , b"abcd" )); |
204 | assert!(is_equal(b"abcde" , b"abcde" )); |
205 | assert!(is_equal(b"abcdef" , b"abcdef" )); |
206 | assert!(is_equal(b"abcdefg" , b"abcdefg" )); |
207 | assert!(is_equal(b"abcdefgh" , b"abcdefgh" )); |
208 | assert!(is_equal(b"abcdefghi" , b"abcdefghi" )); |
209 | } |
210 | |
211 | #[test ] |
212 | fn prefix() { |
213 | assert!(is_prefix(b"" , b"" )); |
214 | assert!(is_prefix(b"a" , b"" )); |
215 | assert!(is_prefix(b"ab" , b"" )); |
216 | assert!(is_prefix(b"foo" , b"foo" )); |
217 | assert!(is_prefix(b"foobar" , b"foo" )); |
218 | |
219 | assert!(!is_prefix(b"foo" , b"fob" )); |
220 | assert!(!is_prefix(b"foobar" , b"fob" )); |
221 | } |
222 | |
223 | #[test ] |
224 | fn suffix() { |
225 | assert!(is_suffix(b"" , b"" )); |
226 | assert!(is_suffix(b"a" , b"" )); |
227 | assert!(is_suffix(b"ab" , b"" )); |
228 | assert!(is_suffix(b"foo" , b"foo" )); |
229 | assert!(is_suffix(b"foobar" , b"bar" )); |
230 | |
231 | assert!(!is_suffix(b"foo" , b"goo" )); |
232 | assert!(!is_suffix(b"foobar" , b"gar" )); |
233 | } |
234 | } |
235 | |