| 1 | /*! |
| 2 | Contains architecture independent routines. |
| 3 | |
| 4 | These routines are often used as a "fallback" implementation when the more |
| 5 | specialized architecture dependent routines are unavailable. |
| 6 | */ |
| 7 | |
| 8 | pub mod memchr; |
| 9 | pub mod packedpair; |
| 10 | pub mod rabinkarp; |
| 11 | #[cfg (feature = "alloc" )] |
| 12 | pub mod shiftor; |
| 13 | pub mod twoway; |
| 14 | |
| 15 | /// Returns true if and only if `needle` is a prefix of `haystack`. |
| 16 | /// |
| 17 | /// This uses a latency optimized variant of `memcmp` internally which *might* |
| 18 | /// make this faster for very short strings. |
| 19 | /// |
| 20 | /// # Inlining |
| 21 | /// |
| 22 | /// This routine is marked `inline(always)`. If you want to call this function |
| 23 | /// in a way that is not always inlined, you'll need to wrap a call to it in |
| 24 | /// another function that is marked as `inline(never)` or just `inline`. |
| 25 | #[inline (always)] |
| 26 | pub fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool { |
| 27 | needle.len() <= haystack.len() |
| 28 | && is_equal(&haystack[..needle.len()], y:needle) |
| 29 | } |
| 30 | |
| 31 | /// Returns true if and only if `needle` is a suffix of `haystack`. |
| 32 | /// |
| 33 | /// This uses a latency optimized variant of `memcmp` internally which *might* |
| 34 | /// make this faster for very short strings. |
| 35 | /// |
| 36 | /// # Inlining |
| 37 | /// |
| 38 | /// This routine is marked `inline(always)`. If you want to call this function |
| 39 | /// in a way that is not always inlined, you'll need to wrap a call to it in |
| 40 | /// another function that is marked as `inline(never)` or just `inline`. |
| 41 | #[inline (always)] |
| 42 | pub fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool { |
| 43 | needle.len() <= haystack.len() |
| 44 | && is_equal(&haystack[haystack.len() - needle.len()..], y:needle) |
| 45 | } |
| 46 | |
| 47 | /// Compare corresponding bytes in `x` and `y` for equality. |
| 48 | /// |
| 49 | /// That is, this returns true if and only if `x.len() == y.len()` and |
| 50 | /// `x[i] == y[i]` for all `0 <= i < x.len()`. |
| 51 | /// |
| 52 | /// # Inlining |
| 53 | /// |
| 54 | /// This routine is marked `inline(always)`. If you want to call this function |
| 55 | /// in a way that is not always inlined, you'll need to wrap a call to it in |
| 56 | /// another function that is marked as `inline(never)` or just `inline`. |
| 57 | /// |
| 58 | /// # Motivation |
| 59 | /// |
| 60 | /// Why not use slice equality instead? Well, slice equality usually results in |
| 61 | /// a call out to the current platform's `libc` which might not be inlineable |
| 62 | /// or have other overhead. This routine isn't guaranteed to be a win, but it |
| 63 | /// might be in some cases. |
| 64 | #[inline (always)] |
| 65 | pub fn is_equal(x: &[u8], y: &[u8]) -> bool { |
| 66 | if x.len() != y.len() { |
| 67 | return false; |
| 68 | } |
| 69 | // SAFETY: Our pointers are derived directly from borrowed slices which |
| 70 | // uphold all of our safety guarantees except for length. We account for |
| 71 | // length with the check above. |
| 72 | unsafe { is_equal_raw(x.as_ptr(), y.as_ptr(), n:x.len()) } |
| 73 | } |
| 74 | |
| 75 | /// Compare `n` bytes at the given pointers for equality. |
| 76 | /// |
| 77 | /// This returns true if and only if `*x.add(i) == *y.add(i)` for all |
| 78 | /// `0 <= i < n`. |
| 79 | /// |
| 80 | /// # Inlining |
| 81 | /// |
| 82 | /// This routine is marked `inline(always)`. If you want to call this function |
| 83 | /// in a way that is not always inlined, you'll need to wrap a call to it in |
| 84 | /// another function that is marked as `inline(never)` or just `inline`. |
| 85 | /// |
| 86 | /// # Motivation |
| 87 | /// |
| 88 | /// Why not use slice equality instead? Well, slice equality usually results in |
| 89 | /// a call out to the current platform's `libc` which might not be inlineable |
| 90 | /// or have other overhead. This routine isn't guaranteed to be a win, but it |
| 91 | /// might be in some cases. |
| 92 | /// |
| 93 | /// # Safety |
| 94 | /// |
| 95 | /// * Both `x` and `y` must be valid for reads of up to `n` bytes. |
| 96 | /// * Both `x` and `y` must point to an initialized value. |
| 97 | /// * Both `x` and `y` must each point to an allocated object and |
| 98 | /// must either be in bounds or at most one byte past the end of the |
| 99 | /// allocated object. `x` and `y` do not need to point to the same allocated |
| 100 | /// object, but they may. |
| 101 | /// * Both `x` and `y` must be _derived from_ a pointer to their respective |
| 102 | /// allocated objects. |
| 103 | /// * The distance between `x` and `x+n` must not overflow `isize`. Similarly |
| 104 | /// for `y` and `y+n`. |
| 105 | /// * The distance being in bounds must not rely on "wrapping around" the |
| 106 | /// address space. |
| 107 | #[inline (always)] |
| 108 | pub unsafe fn is_equal_raw( |
| 109 | mut x: *const u8, |
| 110 | mut y: *const u8, |
| 111 | mut n: usize, |
| 112 | ) -> bool { |
| 113 | // When we have 4 or more bytes to compare, then proceed in chunks of 4 at |
| 114 | // a time using unaligned loads. |
| 115 | // |
| 116 | // Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is |
| 117 | // that this particular version of memcmp is likely to be called with tiny |
| 118 | // needles. That means that if we do 8 byte loads, then a higher proportion |
| 119 | // of memcmp calls will use the slower variant above. With that said, this |
| 120 | // is a hypothesis and is only loosely supported by benchmarks. There's |
| 121 | // likely some improvement that could be made here. The main thing here |
| 122 | // though is to optimize for latency, not throughput. |
| 123 | |
| 124 | // SAFETY: The caller is responsible for ensuring the pointers we get are |
| 125 | // valid and readable for at least `n` bytes. We also do unaligned loads, |
| 126 | // so there's no need to ensure we're aligned. (This is justified by this |
| 127 | // routine being specifically for short strings.) |
| 128 | while n >= 4 { |
| 129 | let vx = x.cast::<u32>().read_unaligned(); |
| 130 | let vy = y.cast::<u32>().read_unaligned(); |
| 131 | if vx != vy { |
| 132 | return false; |
| 133 | } |
| 134 | x = x.add(4); |
| 135 | y = y.add(4); |
| 136 | n -= 4; |
| 137 | } |
| 138 | // If we don't have enough bytes to do 4-byte at a time loads, then |
| 139 | // do partial loads. Note that I used to have a byte-at-a-time |
| 140 | // loop here and that turned out to be quite a bit slower for the |
| 141 | // memmem/pathological/defeat-simple-vector-alphabet benchmark. |
| 142 | if n >= 2 { |
| 143 | let vx = x.cast::<u16>().read_unaligned(); |
| 144 | let vy = y.cast::<u16>().read_unaligned(); |
| 145 | if vx != vy { |
| 146 | return false; |
| 147 | } |
| 148 | x = x.add(2); |
| 149 | y = y.add(2); |
| 150 | n -= 2; |
| 151 | } |
| 152 | if n > 0 { |
| 153 | if x.read() != y.read() { |
| 154 | return false; |
| 155 | } |
| 156 | } |
| 157 | true |
| 158 | } |
| 159 | |
| 160 | #[cfg (test)] |
| 161 | mod tests { |
| 162 | use super::*; |
| 163 | |
| 164 | #[test ] |
| 165 | fn equals_different_lengths() { |
| 166 | assert!(!is_equal(b"" , b"a" )); |
| 167 | assert!(!is_equal(b"a" , b"" )); |
| 168 | assert!(!is_equal(b"ab" , b"a" )); |
| 169 | assert!(!is_equal(b"a" , b"ab" )); |
| 170 | } |
| 171 | |
| 172 | #[test ] |
| 173 | fn equals_mismatch() { |
| 174 | let one_mismatch = [ |
| 175 | (&b"a" [..], &b"x" [..]), |
| 176 | (&b"ab" [..], &b"ax" [..]), |
| 177 | (&b"abc" [..], &b"abx" [..]), |
| 178 | (&b"abcd" [..], &b"abcx" [..]), |
| 179 | (&b"abcde" [..], &b"abcdx" [..]), |
| 180 | (&b"abcdef" [..], &b"abcdex" [..]), |
| 181 | (&b"abcdefg" [..], &b"abcdefx" [..]), |
| 182 | (&b"abcdefgh" [..], &b"abcdefgx" [..]), |
| 183 | (&b"abcdefghi" [..], &b"abcdefghx" [..]), |
| 184 | (&b"abcdefghij" [..], &b"abcdefghix" [..]), |
| 185 | (&b"abcdefghijk" [..], &b"abcdefghijx" [..]), |
| 186 | (&b"abcdefghijkl" [..], &b"abcdefghijkx" [..]), |
| 187 | (&b"abcdefghijklm" [..], &b"abcdefghijklx" [..]), |
| 188 | (&b"abcdefghijklmn" [..], &b"abcdefghijklmx" [..]), |
| 189 | ]; |
| 190 | for (x, y) in one_mismatch { |
| 191 | assert_eq!(x.len(), y.len(), "lengths should match" ); |
| 192 | assert!(!is_equal(x, y)); |
| 193 | assert!(!is_equal(y, x)); |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | #[test ] |
| 198 | fn equals_yes() { |
| 199 | assert!(is_equal(b"" , b"" )); |
| 200 | assert!(is_equal(b"a" , b"a" )); |
| 201 | assert!(is_equal(b"ab" , b"ab" )); |
| 202 | assert!(is_equal(b"abc" , b"abc" )); |
| 203 | assert!(is_equal(b"abcd" , b"abcd" )); |
| 204 | assert!(is_equal(b"abcde" , b"abcde" )); |
| 205 | assert!(is_equal(b"abcdef" , b"abcdef" )); |
| 206 | assert!(is_equal(b"abcdefg" , b"abcdefg" )); |
| 207 | assert!(is_equal(b"abcdefgh" , b"abcdefgh" )); |
| 208 | assert!(is_equal(b"abcdefghi" , b"abcdefghi" )); |
| 209 | } |
| 210 | |
| 211 | #[test ] |
| 212 | fn prefix() { |
| 213 | assert!(is_prefix(b"" , b"" )); |
| 214 | assert!(is_prefix(b"a" , b"" )); |
| 215 | assert!(is_prefix(b"ab" , b"" )); |
| 216 | assert!(is_prefix(b"foo" , b"foo" )); |
| 217 | assert!(is_prefix(b"foobar" , b"foo" )); |
| 218 | |
| 219 | assert!(!is_prefix(b"foo" , b"fob" )); |
| 220 | assert!(!is_prefix(b"foobar" , b"fob" )); |
| 221 | } |
| 222 | |
| 223 | #[test ] |
| 224 | fn suffix() { |
| 225 | assert!(is_suffix(b"" , b"" )); |
| 226 | assert!(is_suffix(b"a" , b"" )); |
| 227 | assert!(is_suffix(b"ab" , b"" )); |
| 228 | assert!(is_suffix(b"foo" , b"foo" )); |
| 229 | assert!(is_suffix(b"foobar" , b"bar" )); |
| 230 | |
| 231 | assert!(!is_suffix(b"foo" , b"goo" )); |
| 232 | assert!(!is_suffix(b"foobar" , b"gar" )); |
| 233 | } |
| 234 | } |
| 235 | |