| 1 | /*! |
| 2 | Wrapper routines for `memchr` and friends. |
| 3 | |
| 4 | These routines efficiently dispatch to the best implementation based on what |
| 5 | the CPU supports. |
| 6 | */ |
| 7 | |
| 8 | /// Provides a way to run a memchr-like function while amortizing the cost of |
| 9 | /// runtime CPU feature detection. |
| 10 | /// |
| 11 | /// This works by loading a function pointer from an atomic global. Initially, |
| 12 | /// this global is set to a function that does CPU feature detection. For |
| 13 | /// example, if AVX2 is enabled, then the AVX2 implementation is used. |
| 14 | /// Otherwise, at least on x86_64, the SSE2 implementation is used. (And |
| 15 | /// in some niche cases, if SSE2 isn't available, then the architecture |
| 16 | /// independent fallback implementation is used.) |
| 17 | /// |
| 18 | /// After the first call to this function, the atomic global is replaced with |
| 19 | /// the specific AVX2, SSE2 or fallback routine chosen. Subsequent calls then |
| 20 | /// will directly call the chosen routine instead of needing to go through the |
| 21 | /// CPU feature detection branching again. |
| 22 | /// |
| 23 | /// This particular macro is specifically written to provide the implementation |
| 24 | /// of functions with the following signature: |
| 25 | /// |
| 26 | /// ```ignore |
| 27 | /// fn memchr(needle1: u8, start: *const u8, end: *const u8) -> Option<usize>; |
| 28 | /// ``` |
| 29 | /// |
| 30 | /// Where you can also have `memchr2` and `memchr3`, but with `needle2` and |
| 31 | /// `needle3`, respectively. The `start` and `end` parameters correspond to the |
| 32 | /// start and end of the haystack, respectively. |
| 33 | /// |
| 34 | /// We use raw pointers here instead of the more obvious `haystack: &[u8]` so |
| 35 | /// that the function is compatible with our lower level iterator logic that |
| 36 | /// operates on raw pointers. We use this macro to implement "raw" memchr |
| 37 | /// routines with the signature above, and then define memchr routines using |
| 38 | /// regular slices on top of them. |
| 39 | /// |
| 40 | /// Note that we use `#[cfg(target_feature = "sse2")]` below even though |
| 41 | /// it shouldn't be strictly necessary because without it, it seems to |
| 42 | /// cause the compiler to blow up. I guess it can't handle a function |
| 43 | /// pointer being created with a sse target feature? Dunno. See the |
| 44 | /// `build-for-x86-64-but-non-sse-target` CI job if you want to experiment with |
| 45 | /// this. |
| 46 | /// |
| 47 | /// # Safety |
| 48 | /// |
| 49 | /// Primarily callers must that `$fnty` is a correct function pointer type and |
| 50 | /// not something else. |
| 51 | /// |
| 52 | /// Callers must also ensure that `$memchrty::$memchrfind` corresponds to a |
| 53 | /// routine that returns a valid function pointer when a match is found. That |
| 54 | /// is, a pointer that is `>= start` and `< end`. |
| 55 | /// |
| 56 | /// Callers must also ensure that the `$hay_start` and `$hay_end` identifiers |
| 57 | /// correspond to valid pointers. |
| 58 | macro_rules! unsafe_ifunc { |
| 59 | ( |
| 60 | $memchrty:ident, |
| 61 | $memchrfind:ident, |
| 62 | $fnty:ty, |
| 63 | $retty:ty, |
| 64 | $hay_start:ident, |
| 65 | $hay_end:ident, |
| 66 | $($needle:ident),+ |
| 67 | ) => {{ |
| 68 | #![allow(unused_unsafe)] |
| 69 | |
| 70 | use core::sync::atomic::{AtomicPtr, Ordering}; |
| 71 | |
| 72 | type Fn = *mut (); |
| 73 | type RealFn = $fnty; |
| 74 | static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn); |
| 75 | |
| 76 | #[cfg(target_feature = "sse2" )] |
| 77 | #[target_feature(enable = "sse2" , enable = "avx2" )] |
| 78 | unsafe fn find_avx2( |
| 79 | $($needle: u8),+, |
| 80 | $hay_start: *const u8, |
| 81 | $hay_end: *const u8, |
| 82 | ) -> $retty { |
| 83 | use crate::arch::x86_64::avx2::memchr::$memchrty; |
| 84 | $memchrty::new_unchecked($($needle),+) |
| 85 | .$memchrfind($hay_start, $hay_end) |
| 86 | } |
| 87 | |
| 88 | #[cfg(target_feature = "sse2" )] |
| 89 | #[target_feature(enable = "sse2" )] |
| 90 | unsafe fn find_sse2( |
| 91 | $($needle: u8),+, |
| 92 | $hay_start: *const u8, |
| 93 | $hay_end: *const u8, |
| 94 | ) -> $retty { |
| 95 | use crate::arch::x86_64::sse2::memchr::$memchrty; |
| 96 | $memchrty::new_unchecked($($needle),+) |
| 97 | .$memchrfind($hay_start, $hay_end) |
| 98 | } |
| 99 | |
| 100 | unsafe fn find_fallback( |
| 101 | $($needle: u8),+, |
| 102 | $hay_start: *const u8, |
| 103 | $hay_end: *const u8, |
| 104 | ) -> $retty { |
| 105 | use crate::arch::all::memchr::$memchrty; |
| 106 | $memchrty::new($($needle),+).$memchrfind($hay_start, $hay_end) |
| 107 | } |
| 108 | |
| 109 | unsafe fn detect( |
| 110 | $($needle: u8),+, |
| 111 | $hay_start: *const u8, |
| 112 | $hay_end: *const u8, |
| 113 | ) -> $retty { |
| 114 | let fun = { |
| 115 | #[cfg(not(target_feature = "sse2" ))] |
| 116 | { |
| 117 | debug!( |
| 118 | "no sse2 feature available, using fallback for {}" , |
| 119 | stringify!($memchrty), |
| 120 | ); |
| 121 | find_fallback as RealFn |
| 122 | } |
| 123 | #[cfg(target_feature = "sse2" )] |
| 124 | { |
| 125 | use crate::arch::x86_64::{sse2, avx2}; |
| 126 | if avx2::memchr::$memchrty::is_available() { |
| 127 | debug!("chose AVX2 for {}" , stringify!($memchrty)); |
| 128 | find_avx2 as RealFn |
| 129 | } else if sse2::memchr::$memchrty::is_available() { |
| 130 | debug!("chose SSE2 for {}" , stringify!($memchrty)); |
| 131 | find_sse2 as RealFn |
| 132 | } else { |
| 133 | debug!("chose fallback for {}" , stringify!($memchrty)); |
| 134 | find_fallback as RealFn |
| 135 | } |
| 136 | } |
| 137 | }; |
| 138 | FN.store(fun as Fn, Ordering::Relaxed); |
| 139 | // SAFETY: The only thing we need to uphold here is the |
| 140 | // `#[target_feature]` requirements. Since we check is_available |
| 141 | // above before using the corresponding implementation, we are |
| 142 | // guaranteed to only call code that is supported on the current |
| 143 | // CPU. |
| 144 | fun($($needle),+, $hay_start, $hay_end) |
| 145 | } |
| 146 | |
| 147 | // SAFETY: By virtue of the caller contract, RealFn is a function |
| 148 | // pointer, which is always safe to transmute with a *mut (). Also, |
| 149 | // since we use $memchrty::is_available, it is guaranteed to be safe |
| 150 | // to call $memchrty::$memchrfind. |
| 151 | unsafe { |
| 152 | let fun = FN.load(Ordering::Relaxed); |
| 153 | core::mem::transmute::<Fn, RealFn>(fun)( |
| 154 | $($needle),+, |
| 155 | $hay_start, |
| 156 | $hay_end, |
| 157 | ) |
| 158 | } |
| 159 | }}; |
| 160 | } |
| 161 | |
| 162 | // The routines below dispatch to AVX2, SSE2 or a fallback routine based on |
| 163 | // what's available in the current environment. The secret sauce here is that |
| 164 | // we only check for which one to use approximately once, and then "cache" that |
| 165 | // choice into a global function pointer. Subsequent invocations then just call |
| 166 | // the appropriate function directly. |
| 167 | |
| 168 | /// memchr, but using raw pointers to represent the haystack. |
| 169 | /// |
| 170 | /// # Safety |
| 171 | /// |
| 172 | /// Pointers must be valid. See `One::find_raw`. |
| 173 | #[inline (always)] |
| 174 | pub(crate) fn memchr_raw( |
| 175 | n1: u8, |
| 176 | start: *const u8, |
| 177 | end: *const u8, |
| 178 | ) -> Option<*const u8> { |
| 179 | // SAFETY: We provide a valid function pointer type. |
| 180 | unsafe_ifunc!( |
| 181 | One, |
| 182 | find_raw, |
| 183 | unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>, |
| 184 | Option<*const u8>, |
| 185 | start, |
| 186 | end, |
| 187 | n1 |
| 188 | ) |
| 189 | } |
| 190 | |
| 191 | /// memrchr, but using raw pointers to represent the haystack. |
| 192 | /// |
| 193 | /// # Safety |
| 194 | /// |
| 195 | /// Pointers must be valid. See `One::rfind_raw`. |
| 196 | #[inline (always)] |
| 197 | pub(crate) fn memrchr_raw( |
| 198 | n1: u8, |
| 199 | start: *const u8, |
| 200 | end: *const u8, |
| 201 | ) -> Option<*const u8> { |
| 202 | // SAFETY: We provide a valid function pointer type. |
| 203 | unsafe_ifunc!( |
| 204 | One, |
| 205 | rfind_raw, |
| 206 | unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>, |
| 207 | Option<*const u8>, |
| 208 | start, |
| 209 | end, |
| 210 | n1 |
| 211 | ) |
| 212 | } |
| 213 | |
| 214 | /// memchr2, but using raw pointers to represent the haystack. |
| 215 | /// |
| 216 | /// # Safety |
| 217 | /// |
| 218 | /// Pointers must be valid. See `Two::find_raw`. |
| 219 | #[inline (always)] |
| 220 | pub(crate) fn memchr2_raw( |
| 221 | n1: u8, |
| 222 | n2: u8, |
| 223 | start: *const u8, |
| 224 | end: *const u8, |
| 225 | ) -> Option<*const u8> { |
| 226 | // SAFETY: We provide a valid function pointer type. |
| 227 | unsafe_ifunc!( |
| 228 | Two, |
| 229 | find_raw, |
| 230 | unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>, |
| 231 | Option<*const u8>, |
| 232 | start, |
| 233 | end, |
| 234 | n1, |
| 235 | n2 |
| 236 | ) |
| 237 | } |
| 238 | |
| 239 | /// memrchr2, but using raw pointers to represent the haystack. |
| 240 | /// |
| 241 | /// # Safety |
| 242 | /// |
| 243 | /// Pointers must be valid. See `Two::rfind_raw`. |
| 244 | #[inline (always)] |
| 245 | pub(crate) fn memrchr2_raw( |
| 246 | n1: u8, |
| 247 | n2: u8, |
| 248 | start: *const u8, |
| 249 | end: *const u8, |
| 250 | ) -> Option<*const u8> { |
| 251 | // SAFETY: We provide a valid function pointer type. |
| 252 | unsafe_ifunc!( |
| 253 | Two, |
| 254 | rfind_raw, |
| 255 | unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>, |
| 256 | Option<*const u8>, |
| 257 | start, |
| 258 | end, |
| 259 | n1, |
| 260 | n2 |
| 261 | ) |
| 262 | } |
| 263 | |
| 264 | /// memchr3, but using raw pointers to represent the haystack. |
| 265 | /// |
| 266 | /// # Safety |
| 267 | /// |
| 268 | /// Pointers must be valid. See `Three::find_raw`. |
| 269 | #[inline (always)] |
| 270 | pub(crate) fn memchr3_raw( |
| 271 | n1: u8, |
| 272 | n2: u8, |
| 273 | n3: u8, |
| 274 | start: *const u8, |
| 275 | end: *const u8, |
| 276 | ) -> Option<*const u8> { |
| 277 | // SAFETY: We provide a valid function pointer type. |
| 278 | unsafe_ifunc!( |
| 279 | Three, |
| 280 | find_raw, |
| 281 | unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>, |
| 282 | Option<*const u8>, |
| 283 | start, |
| 284 | end, |
| 285 | n1, |
| 286 | n2, |
| 287 | n3 |
| 288 | ) |
| 289 | } |
| 290 | |
| 291 | /// memrchr3, but using raw pointers to represent the haystack. |
| 292 | /// |
| 293 | /// # Safety |
| 294 | /// |
| 295 | /// Pointers must be valid. See `Three::rfind_raw`. |
| 296 | #[inline (always)] |
| 297 | pub(crate) fn memrchr3_raw( |
| 298 | n1: u8, |
| 299 | n2: u8, |
| 300 | n3: u8, |
| 301 | start: *const u8, |
| 302 | end: *const u8, |
| 303 | ) -> Option<*const u8> { |
| 304 | // SAFETY: We provide a valid function pointer type. |
| 305 | unsafe_ifunc!( |
| 306 | Three, |
| 307 | rfind_raw, |
| 308 | unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>, |
| 309 | Option<*const u8>, |
| 310 | start, |
| 311 | end, |
| 312 | n1, |
| 313 | n2, |
| 314 | n3 |
| 315 | ) |
| 316 | } |
| 317 | |
| 318 | /// Count all matching bytes, but using raw pointers to represent the haystack. |
| 319 | /// |
| 320 | /// # Safety |
| 321 | /// |
| 322 | /// Pointers must be valid. See `One::count_raw`. |
| 323 | #[inline (always)] |
| 324 | pub(crate) fn count_raw(n1: u8, start: *const u8, end: *const u8) -> usize { |
| 325 | // SAFETY: We provide a valid function pointer type. |
| 326 | unsafe_ifunc!( |
| 327 | One, |
| 328 | count_raw, |
| 329 | unsafe fn(u8, *const u8, *const u8) -> usize, |
| 330 | usize, |
| 331 | start, |
| 332 | end, |
| 333 | n1 |
| 334 | ) |
| 335 | } |
| 336 | |