| 1 | cfg_if! { |
| 2 | // Use the SSE2 implementation if possible: it allows us to scan 16 buckets |
| 3 | // at once instead of 8. We don't bother with AVX since it would require |
| 4 | // runtime dispatch and wouldn't gain us much anyways: the probability of |
| 5 | // finding a match drops off drastically after the first few buckets. |
| 6 | // |
| 7 | // I attempted an implementation on ARM using NEON instructions, but it |
| 8 | // turns out that most NEON instructions have multi-cycle latency, which in |
| 9 | // the end outweighs any gains over the generic implementation. |
| 10 | if #[cfg(all( |
| 11 | target_feature = "sse2" , |
| 12 | any(target_arch = "x86" , target_arch = "x86_64" ), |
| 13 | not(miri), |
| 14 | ))] { |
| 15 | mod sse2; |
| 16 | use sse2 as imp; |
| 17 | } else if #[cfg(all( |
| 18 | target_arch = "aarch64" , |
| 19 | target_feature = "neon" , |
| 20 | // NEON intrinsics are currently broken on big-endian targets. |
| 21 | // See https://github.com/rust-lang/stdarch/issues/1484. |
| 22 | target_endian = "little" , |
| 23 | not(miri), |
| 24 | ))] { |
| 25 | mod neon; |
| 26 | use neon as imp; |
| 27 | } else if #[cfg(all( |
| 28 | feature = "nightly" , |
| 29 | target_arch = "loongarch64" , |
| 30 | target_feature = "lsx" , |
| 31 | not(miri), |
| 32 | ))] { |
| 33 | mod lsx; |
| 34 | use lsx as imp; |
| 35 | } else { |
| 36 | mod generic; |
| 37 | use generic as imp; |
| 38 | } |
| 39 | } |
| 40 | pub(crate) use self::imp::Group; |
| 41 | pub(super) use self::imp::{ |
| 42 | BitMaskWord, NonZeroBitMaskWord, BITMASK_ITER_MASK, BITMASK_MASK, BITMASK_STRIDE, |
| 43 | }; |
| 44 | |