1 | cfg_if! { |
2 | // Use the SSE2 implementation if possible: it allows us to scan 16 buckets |
3 | // at once instead of 8. We don't bother with AVX since it would require |
4 | // runtime dispatch and wouldn't gain us much anyways: the probability of |
5 | // finding a match drops off drastically after the first few buckets. |
6 | // |
7 | // I attempted an implementation on ARM using NEON instructions, but it |
8 | // turns out that most NEON instructions have multi-cycle latency, which in |
9 | // the end outweighs any gains over the generic implementation. |
10 | if #[cfg(all( |
11 | target_feature = "sse2" , |
12 | any(target_arch = "x86" , target_arch = "x86_64" ), |
13 | not(miri), |
14 | ))] { |
15 | mod sse2; |
16 | use sse2 as imp; |
17 | } else if #[cfg(all( |
18 | target_arch = "aarch64" , |
19 | target_feature = "neon" , |
20 | // NEON intrinsics are currently broken on big-endian targets. |
21 | // See https://github.com/rust-lang/stdarch/issues/1484. |
22 | target_endian = "little" , |
23 | not(miri), |
24 | ))] { |
25 | mod neon; |
26 | use neon as imp; |
27 | } else { |
28 | mod generic; |
29 | use generic as imp; |
30 | } |
31 | } |
32 | pub(crate) use self::imp::Group; |
33 | pub(super) use self::imp::{ |
34 | BitMaskWord, NonZeroBitMaskWord, BITMASK_ITER_MASK, BITMASK_MASK, BITMASK_STRIDE, |
35 | }; |
36 | |