| 1 | /*! | 
| 2 | Wrapper routines for `memchr` and friends. | 
|---|
| 3 |  | 
|---|
| 4 | These routines efficiently dispatch to the best implementation based on what | 
|---|
| 5 | the CPU supports. | 
|---|
| 6 | */ | 
|---|
| 7 |  | 
|---|
| 8 | /// Provides a way to run a memchr-like function while amortizing the cost of | 
|---|
| 9 | /// runtime CPU feature detection. | 
|---|
| 10 | /// | 
|---|
| 11 | /// This works by loading a function pointer from an atomic global. Initially, | 
|---|
| 12 | /// this global is set to a function that does CPU feature detection. For | 
|---|
| 13 | /// example, if AVX2 is enabled, then the AVX2 implementation is used. | 
|---|
| 14 | /// Otherwise, at least on x86_64, the SSE2 implementation is used. (And | 
|---|
| 15 | /// in some niche cases, if SSE2 isn't available, then the architecture | 
|---|
| 16 | /// independent fallback implementation is used.) | 
|---|
| 17 | /// | 
|---|
| 18 | /// After the first call to this function, the atomic global is replaced with | 
|---|
| 19 | /// the specific AVX2, SSE2 or fallback routine chosen. Subsequent calls then | 
|---|
| 20 | /// will directly call the chosen routine instead of needing to go through the | 
|---|
| 21 | /// CPU feature detection branching again. | 
|---|
| 22 | /// | 
|---|
| 23 | /// This particular macro is specifically written to provide the implementation | 
|---|
| 24 | /// of functions with the following signature: | 
|---|
| 25 | /// | 
|---|
| 26 | /// ```ignore | 
|---|
| 27 | /// fn memchr(needle1: u8, start: *const u8, end: *const u8) -> Option<usize>; | 
|---|
| 28 | /// ``` | 
|---|
| 29 | /// | 
|---|
| 30 | /// Where you can also have `memchr2` and `memchr3`, but with `needle2` and | 
|---|
| 31 | /// `needle3`, respectively. The `start` and `end` parameters correspond to the | 
|---|
| 32 | /// start and end of the haystack, respectively. | 
|---|
| 33 | /// | 
|---|
| 34 | /// We use raw pointers here instead of the more obvious `haystack: &[u8]` so | 
|---|
| 35 | /// that the function is compatible with our lower level iterator logic that | 
|---|
| 36 | /// operates on raw pointers. We use this macro to implement "raw" memchr | 
|---|
| 37 | /// routines with the signature above, and then define memchr routines using | 
|---|
| 38 | /// regular slices on top of them. | 
|---|
| 39 | /// | 
|---|
| 40 | /// Note that we use `#[cfg(target_feature = "sse2")]` below even though | 
|---|
| 41 | /// it shouldn't be strictly necessary because without it, it seems to | 
|---|
| 42 | /// cause the compiler to blow up. I guess it can't handle a function | 
|---|
| 43 | /// pointer being created with a sse target feature? Dunno. See the | 
|---|
| 44 | /// `build-for-x86-64-but-non-sse-target` CI job if you want to experiment with | 
|---|
| 45 | /// this. | 
|---|
| 46 | /// | 
|---|
| 47 | /// # Safety | 
|---|
| 48 | /// | 
|---|
| 49 | /// Primarily callers must that `$fnty` is a correct function pointer type and | 
|---|
| 50 | /// not something else. | 
|---|
| 51 | /// | 
|---|
| 52 | /// Callers must also ensure that `$memchrty::$memchrfind` corresponds to a | 
|---|
| 53 | /// routine that returns a valid function pointer when a match is found. That | 
|---|
| 54 | /// is, a pointer that is `>= start` and `< end`. | 
|---|
| 55 | /// | 
|---|
| 56 | /// Callers must also ensure that the `$hay_start` and `$hay_end` identifiers | 
|---|
| 57 | /// correspond to valid pointers. | 
|---|
| 58 | macro_rules! unsafe_ifunc { | 
|---|
| 59 | ( | 
|---|
| 60 | $memchrty:ident, | 
|---|
| 61 | $memchrfind:ident, | 
|---|
| 62 | $fnty:ty, | 
|---|
| 63 | $retty:ty, | 
|---|
| 64 | $hay_start:ident, | 
|---|
| 65 | $hay_end:ident, | 
|---|
| 66 | $($needle:ident),+ | 
|---|
| 67 | ) => {{ | 
|---|
| 68 | #![allow(unused_unsafe)] | 
|---|
| 69 |  | 
|---|
| 70 | use core::sync::atomic::{AtomicPtr, Ordering}; | 
|---|
| 71 |  | 
|---|
| 72 | type Fn = *mut (); | 
|---|
| 73 | type RealFn = $fnty; | 
|---|
| 74 | static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn); | 
|---|
| 75 |  | 
|---|
| 76 | #[cfg(target_feature = "sse2")] | 
|---|
| 77 | #[target_feature(enable = "sse2", enable = "avx2")] | 
|---|
| 78 | unsafe fn find_avx2( | 
|---|
| 79 | $($needle: u8),+, | 
|---|
| 80 | $hay_start: *const u8, | 
|---|
| 81 | $hay_end: *const u8, | 
|---|
| 82 | ) -> $retty { | 
|---|
| 83 | use crate::arch::x86_64::avx2::memchr::$memchrty; | 
|---|
| 84 | $memchrty::new_unchecked($($needle),+) | 
|---|
| 85 | .$memchrfind($hay_start, $hay_end) | 
|---|
| 86 | } | 
|---|
| 87 |  | 
|---|
| 88 | #[cfg(target_feature = "sse2")] | 
|---|
| 89 | #[target_feature(enable = "sse2")] | 
|---|
| 90 | unsafe fn find_sse2( | 
|---|
| 91 | $($needle: u8),+, | 
|---|
| 92 | $hay_start: *const u8, | 
|---|
| 93 | $hay_end: *const u8, | 
|---|
| 94 | ) -> $retty { | 
|---|
| 95 | use crate::arch::x86_64::sse2::memchr::$memchrty; | 
|---|
| 96 | $memchrty::new_unchecked($($needle),+) | 
|---|
| 97 | .$memchrfind($hay_start, $hay_end) | 
|---|
| 98 | } | 
|---|
| 99 |  | 
|---|
| 100 | unsafe fn find_fallback( | 
|---|
| 101 | $($needle: u8),+, | 
|---|
| 102 | $hay_start: *const u8, | 
|---|
| 103 | $hay_end: *const u8, | 
|---|
| 104 | ) -> $retty { | 
|---|
| 105 | use crate::arch::all::memchr::$memchrty; | 
|---|
| 106 | $memchrty::new($($needle),+).$memchrfind($hay_start, $hay_end) | 
|---|
| 107 | } | 
|---|
| 108 |  | 
|---|
| 109 | unsafe fn detect( | 
|---|
| 110 | $($needle: u8),+, | 
|---|
| 111 | $hay_start: *const u8, | 
|---|
| 112 | $hay_end: *const u8, | 
|---|
| 113 | ) -> $retty { | 
|---|
| 114 | let fun = { | 
|---|
| 115 | #[cfg(not(target_feature = "sse2"))] | 
|---|
| 116 | { | 
|---|
| 117 | debug!( | 
|---|
| 118 | "no sse2 feature available, using fallback for {}", | 
|---|
| 119 | stringify!($memchrty), | 
|---|
| 120 | ); | 
|---|
| 121 | find_fallback as RealFn | 
|---|
| 122 | } | 
|---|
| 123 | #[cfg(target_feature = "sse2")] | 
|---|
| 124 | { | 
|---|
| 125 | use crate::arch::x86_64::{sse2, avx2}; | 
|---|
| 126 | if avx2::memchr::$memchrty::is_available() { | 
|---|
| 127 | debug!( "chose AVX2 for {}", stringify!($memchrty)); | 
|---|
| 128 | find_avx2 as RealFn | 
|---|
| 129 | } else if sse2::memchr::$memchrty::is_available() { | 
|---|
| 130 | debug!( "chose SSE2 for {}", stringify!($memchrty)); | 
|---|
| 131 | find_sse2 as RealFn | 
|---|
| 132 | } else { | 
|---|
| 133 | debug!( "chose fallback for {}", stringify!($memchrty)); | 
|---|
| 134 | find_fallback as RealFn | 
|---|
| 135 | } | 
|---|
| 136 | } | 
|---|
| 137 | }; | 
|---|
| 138 | FN.store(fun as Fn, Ordering::Relaxed); | 
|---|
| 139 | // SAFETY: The only thing we need to uphold here is the | 
|---|
| 140 | // `#[target_feature]` requirements. Since we check is_available | 
|---|
| 141 | // above before using the corresponding implementation, we are | 
|---|
| 142 | // guaranteed to only call code that is supported on the current | 
|---|
| 143 | // CPU. | 
|---|
| 144 | fun($($needle),+, $hay_start, $hay_end) | 
|---|
| 145 | } | 
|---|
| 146 |  | 
|---|
| 147 | // SAFETY: By virtue of the caller contract, RealFn is a function | 
|---|
| 148 | // pointer, which is always safe to transmute with a *mut (). Also, | 
|---|
| 149 | // since we use $memchrty::is_available, it is guaranteed to be safe | 
|---|
| 150 | // to call $memchrty::$memchrfind. | 
|---|
| 151 | unsafe { | 
|---|
| 152 | let fun = FN.load(Ordering::Relaxed); | 
|---|
| 153 | core::mem::transmute::<Fn, RealFn>(fun)( | 
|---|
| 154 | $($needle),+, | 
|---|
| 155 | $hay_start, | 
|---|
| 156 | $hay_end, | 
|---|
| 157 | ) | 
|---|
| 158 | } | 
|---|
| 159 | }}; | 
|---|
| 160 | } | 
|---|
| 161 |  | 
|---|
| 162 | // The routines below dispatch to AVX2, SSE2 or a fallback routine based on | 
|---|
| 163 | // what's available in the current environment. The secret sauce here is that | 
|---|
| 164 | // we only check for which one to use approximately once, and then "cache" that | 
|---|
| 165 | // choice into a global function pointer. Subsequent invocations then just call | 
|---|
| 166 | // the appropriate function directly. | 
|---|
| 167 |  | 
|---|
| 168 | /// memchr, but using raw pointers to represent the haystack. | 
|---|
| 169 | /// | 
|---|
| 170 | /// # Safety | 
|---|
| 171 | /// | 
|---|
| 172 | /// Pointers must be valid. See `One::find_raw`. | 
|---|
| 173 | #[ inline(always)] | 
|---|
| 174 | pub(crate) fn memchr_raw( | 
|---|
| 175 | n1: u8, | 
|---|
| 176 | start: *const u8, | 
|---|
| 177 | end: *const u8, | 
|---|
| 178 | ) -> Option<*const u8> { | 
|---|
| 179 | // SAFETY: We provide a valid function pointer type. | 
|---|
| 180 | unsafe_ifunc!( | 
|---|
| 181 | One, | 
|---|
| 182 | find_raw, | 
|---|
| 183 | unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>, | 
|---|
| 184 | Option<*const u8>, | 
|---|
| 185 | start, | 
|---|
| 186 | end, | 
|---|
| 187 | n1 | 
|---|
| 188 | ) | 
|---|
| 189 | } | 
|---|
| 190 |  | 
|---|
| 191 | /// memrchr, but using raw pointers to represent the haystack. | 
|---|
| 192 | /// | 
|---|
| 193 | /// # Safety | 
|---|
| 194 | /// | 
|---|
| 195 | /// Pointers must be valid. See `One::rfind_raw`. | 
|---|
| 196 | #[ inline(always)] | 
|---|
| 197 | pub(crate) fn memrchr_raw( | 
|---|
| 198 | n1: u8, | 
|---|
| 199 | start: *const u8, | 
|---|
| 200 | end: *const u8, | 
|---|
| 201 | ) -> Option<*const u8> { | 
|---|
| 202 | // SAFETY: We provide a valid function pointer type. | 
|---|
| 203 | unsafe_ifunc!( | 
|---|
| 204 | One, | 
|---|
| 205 | rfind_raw, | 
|---|
| 206 | unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>, | 
|---|
| 207 | Option<*const u8>, | 
|---|
| 208 | start, | 
|---|
| 209 | end, | 
|---|
| 210 | n1 | 
|---|
| 211 | ) | 
|---|
| 212 | } | 
|---|
| 213 |  | 
|---|
| 214 | /// memchr2, but using raw pointers to represent the haystack. | 
|---|
| 215 | /// | 
|---|
| 216 | /// # Safety | 
|---|
| 217 | /// | 
|---|
| 218 | /// Pointers must be valid. See `Two::find_raw`. | 
|---|
| 219 | #[ inline(always)] | 
|---|
| 220 | pub(crate) fn memchr2_raw( | 
|---|
| 221 | n1: u8, | 
|---|
| 222 | n2: u8, | 
|---|
| 223 | start: *const u8, | 
|---|
| 224 | end: *const u8, | 
|---|
| 225 | ) -> Option<*const u8> { | 
|---|
| 226 | // SAFETY: We provide a valid function pointer type. | 
|---|
| 227 | unsafe_ifunc!( | 
|---|
| 228 | Two, | 
|---|
| 229 | find_raw, | 
|---|
| 230 | unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>, | 
|---|
| 231 | Option<*const u8>, | 
|---|
| 232 | start, | 
|---|
| 233 | end, | 
|---|
| 234 | n1, | 
|---|
| 235 | n2 | 
|---|
| 236 | ) | 
|---|
| 237 | } | 
|---|
| 238 |  | 
|---|
| 239 | /// memrchr2, but using raw pointers to represent the haystack. | 
|---|
| 240 | /// | 
|---|
| 241 | /// # Safety | 
|---|
| 242 | /// | 
|---|
| 243 | /// Pointers must be valid. See `Two::rfind_raw`. | 
|---|
| 244 | #[ inline(always)] | 
|---|
| 245 | pub(crate) fn memrchr2_raw( | 
|---|
| 246 | n1: u8, | 
|---|
| 247 | n2: u8, | 
|---|
| 248 | start: *const u8, | 
|---|
| 249 | end: *const u8, | 
|---|
| 250 | ) -> Option<*const u8> { | 
|---|
| 251 | // SAFETY: We provide a valid function pointer type. | 
|---|
| 252 | unsafe_ifunc!( | 
|---|
| 253 | Two, | 
|---|
| 254 | rfind_raw, | 
|---|
| 255 | unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>, | 
|---|
| 256 | Option<*const u8>, | 
|---|
| 257 | start, | 
|---|
| 258 | end, | 
|---|
| 259 | n1, | 
|---|
| 260 | n2 | 
|---|
| 261 | ) | 
|---|
| 262 | } | 
|---|
| 263 |  | 
|---|
| 264 | /// memchr3, but using raw pointers to represent the haystack. | 
|---|
| 265 | /// | 
|---|
| 266 | /// # Safety | 
|---|
| 267 | /// | 
|---|
| 268 | /// Pointers must be valid. See `Three::find_raw`. | 
|---|
| 269 | #[ inline(always)] | 
|---|
| 270 | pub(crate) fn memchr3_raw( | 
|---|
| 271 | n1: u8, | 
|---|
| 272 | n2: u8, | 
|---|
| 273 | n3: u8, | 
|---|
| 274 | start: *const u8, | 
|---|
| 275 | end: *const u8, | 
|---|
| 276 | ) -> Option<*const u8> { | 
|---|
| 277 | // SAFETY: We provide a valid function pointer type. | 
|---|
| 278 | unsafe_ifunc!( | 
|---|
| 279 | Three, | 
|---|
| 280 | find_raw, | 
|---|
| 281 | unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>, | 
|---|
| 282 | Option<*const u8>, | 
|---|
| 283 | start, | 
|---|
| 284 | end, | 
|---|
| 285 | n1, | 
|---|
| 286 | n2, | 
|---|
| 287 | n3 | 
|---|
| 288 | ) | 
|---|
| 289 | } | 
|---|
| 290 |  | 
|---|
| 291 | /// memrchr3, but using raw pointers to represent the haystack. | 
|---|
| 292 | /// | 
|---|
| 293 | /// # Safety | 
|---|
| 294 | /// | 
|---|
| 295 | /// Pointers must be valid. See `Three::rfind_raw`. | 
|---|
| 296 | #[ inline(always)] | 
|---|
| 297 | pub(crate) fn memrchr3_raw( | 
|---|
| 298 | n1: u8, | 
|---|
| 299 | n2: u8, | 
|---|
| 300 | n3: u8, | 
|---|
| 301 | start: *const u8, | 
|---|
| 302 | end: *const u8, | 
|---|
| 303 | ) -> Option<*const u8> { | 
|---|
| 304 | // SAFETY: We provide a valid function pointer type. | 
|---|
| 305 | unsafe_ifunc!( | 
|---|
| 306 | Three, | 
|---|
| 307 | rfind_raw, | 
|---|
| 308 | unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>, | 
|---|
| 309 | Option<*const u8>, | 
|---|
| 310 | start, | 
|---|
| 311 | end, | 
|---|
| 312 | n1, | 
|---|
| 313 | n2, | 
|---|
| 314 | n3 | 
|---|
| 315 | ) | 
|---|
| 316 | } | 
|---|
| 317 |  | 
|---|
| 318 | /// Count all matching bytes, but using raw pointers to represent the haystack. | 
|---|
| 319 | /// | 
|---|
| 320 | /// # Safety | 
|---|
| 321 | /// | 
|---|
| 322 | /// Pointers must be valid. See `One::count_raw`. | 
|---|
| 323 | #[ inline(always)] | 
|---|
| 324 | pub(crate) fn count_raw(n1: u8, start: *const u8, end: *const u8) -> usize { | 
|---|
| 325 | // SAFETY: We provide a valid function pointer type. | 
|---|
| 326 | unsafe_ifunc!( | 
|---|
| 327 | One, | 
|---|
| 328 | count_raw, | 
|---|
| 329 | unsafe fn(u8, *const u8, *const u8) -> usize, | 
|---|
| 330 | usize, | 
|---|
| 331 | start, | 
|---|
| 332 | end, | 
|---|
| 333 | n1 | 
|---|
| 334 | ) | 
|---|
| 335 | } | 
|---|
| 336 |  | 
|---|