1 | /// A trait for describing vector operations used by vectorized searchers. |
2 | /// |
3 | /// The trait is highly constrained to low level vector operations needed. In |
4 | /// general, it was invented mostly to be generic over x86's __m128i and |
5 | /// __m256i types. It's likely that once std::simd becomes a thing, we can |
6 | /// migrate to that since the operations required are quite simple. |
7 | /// |
8 | /// TODO: Consider moving this trait up a level and using it to implement |
9 | /// memchr as well. The trait might need to grow one or two methods, but |
10 | /// otherwise should be close to sufficient already. |
11 | /// |
12 | /// # Safety |
13 | /// |
14 | /// All methods are not safe since they are intended to be implemented using |
15 | /// vendor intrinsics, which are also not safe. Callers must ensure that the |
16 | /// appropriate target features are enabled in the calling function, and that |
17 | /// the current CPU supports them. All implementations should avoid marking the |
18 | /// routines with #[target_feature] and instead mark them as #[inline(always)] |
19 | /// to ensure they get appropriately inlined. (inline(always) cannot be used |
20 | /// with target_feature.) |
21 | pub(crate) trait Vector: Copy + core::fmt::Debug { |
22 | /// _mm_set1_epi8 or _mm256_set1_epi8 |
23 | unsafe fn splat(byte: u8) -> Self; |
24 | /// _mm_loadu_si128 or _mm256_loadu_si256 |
25 | unsafe fn load_unaligned(data: *const u8) -> Self; |
26 | /// _mm_movemask_epi8 or _mm256_movemask_epi8 |
27 | unsafe fn movemask(self) -> u32; |
28 | /// _mm_cmpeq_epi8 or _mm256_cmpeq_epi8 |
29 | unsafe fn cmpeq(self, vector2: Self) -> Self; |
30 | /// _mm_and_si128 or _mm256_and_si256 |
31 | unsafe fn and(self, vector2: Self) -> Self; |
32 | } |
33 | |
34 | #[cfg (target_arch = "x86_64" )] |
35 | mod x86sse { |
36 | use super::Vector; |
37 | use core::arch::x86_64::*; |
38 | |
39 | impl Vector for __m128i { |
40 | #[inline (always)] |
41 | unsafe fn splat(byte: u8) -> __m128i { |
42 | _mm_set1_epi8(byte as i8) |
43 | } |
44 | |
45 | #[inline (always)] |
46 | unsafe fn load_unaligned(data: *const u8) -> __m128i { |
47 | _mm_loadu_si128(data as *const __m128i) |
48 | } |
49 | |
50 | #[inline (always)] |
51 | unsafe fn movemask(self) -> u32 { |
52 | _mm_movemask_epi8(self) as u32 |
53 | } |
54 | |
55 | #[inline (always)] |
56 | unsafe fn cmpeq(self, vector2: Self) -> __m128i { |
57 | _mm_cmpeq_epi8(self, vector2) |
58 | } |
59 | |
60 | #[inline (always)] |
61 | unsafe fn and(self, vector2: Self) -> __m128i { |
62 | _mm_and_si128(self, vector2) |
63 | } |
64 | } |
65 | } |
66 | |
67 | #[cfg (all(feature = "std" , target_arch = "x86_64" ))] |
68 | mod x86avx { |
69 | use super::Vector; |
70 | use core::arch::x86_64::*; |
71 | |
72 | impl Vector for __m256i { |
73 | #[inline (always)] |
74 | unsafe fn splat(byte: u8) -> __m256i { |
75 | _mm256_set1_epi8(byte as i8) |
76 | } |
77 | |
78 | #[inline (always)] |
79 | unsafe fn load_unaligned(data: *const u8) -> __m256i { |
80 | _mm256_loadu_si256(data as *const __m256i) |
81 | } |
82 | |
83 | #[inline (always)] |
84 | unsafe fn movemask(self) -> u32 { |
85 | _mm256_movemask_epi8(self) as u32 |
86 | } |
87 | |
88 | #[inline (always)] |
89 | unsafe fn cmpeq(self, vector2: Self) -> __m256i { |
90 | _mm256_cmpeq_epi8(self, vector2) |
91 | } |
92 | |
93 | #[inline (always)] |
94 | unsafe fn and(self, vector2: Self) -> __m256i { |
95 | _mm256_and_si256(self, vector2) |
96 | } |
97 | } |
98 | } |
99 | |
100 | #[cfg (target_arch = "wasm32" )] |
101 | mod wasm_simd128 { |
102 | use super::Vector; |
103 | use core::arch::wasm32::*; |
104 | |
105 | impl Vector for v128 { |
106 | #[inline (always)] |
107 | unsafe fn splat(byte: u8) -> v128 { |
108 | u8x16_splat(byte) |
109 | } |
110 | |
111 | #[inline (always)] |
112 | unsafe fn load_unaligned(data: *const u8) -> v128 { |
113 | v128_load(data.cast()) |
114 | } |
115 | |
116 | #[inline (always)] |
117 | unsafe fn movemask(self) -> u32 { |
118 | u8x16_bitmask(self).into() |
119 | } |
120 | |
121 | #[inline (always)] |
122 | unsafe fn cmpeq(self, vector2: Self) -> v128 { |
123 | u8x16_eq(self, vector2) |
124 | } |
125 | |
126 | #[inline (always)] |
127 | unsafe fn and(self, vector2: Self) -> v128 { |
128 | v128_and(self, vector2) |
129 | } |
130 | } |
131 | } |
132 | |