1 | //! Bit-oriented Algorithms (BITALG) |
2 | //! |
3 | //! The intrinsics here correspond to those in the `immintrin.h` C header. |
4 | //! |
5 | //! The reference is [Intel 64 and IA-32 Architectures Software Developer's |
6 | //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. |
7 | //! |
8 | //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf |
9 | |
10 | use crate::core_arch::simd::i16x16; |
11 | use crate::core_arch::simd::i16x32; |
12 | use crate::core_arch::simd::i16x8; |
13 | use crate::core_arch::simd::i8x16; |
14 | use crate::core_arch::simd::i8x32; |
15 | use crate::core_arch::simd::i8x64; |
16 | use crate::core_arch::x86::__m128i; |
17 | use crate::core_arch::x86::__m256i; |
18 | use crate::core_arch::x86::__m512i; |
19 | use crate::core_arch::x86::__mmask16; |
20 | use crate::core_arch::x86::__mmask32; |
21 | use crate::core_arch::x86::__mmask64; |
22 | use crate::core_arch::x86::__mmask8; |
23 | use crate::core_arch::x86::_mm256_setzero_si256; |
24 | use crate::core_arch::x86::_mm512_setzero_si512; |
25 | use crate::core_arch::x86::_mm_setzero_si128; |
26 | use crate::core_arch::x86::m128iExt; |
27 | use crate::core_arch::x86::m256iExt; |
28 | use crate::core_arch::x86::m512iExt; |
29 | use crate::intrinsics::simd::simd_select_bitmask; |
30 | use crate::mem::transmute; |
31 | |
32 | #[cfg (test)] |
33 | use stdarch_test::assert_instr; |
34 | |
35 | #[allow (improper_ctypes)] |
36 | extern "C" { |
37 | #[link_name = "llvm.ctpop.v32i16" ] |
38 | fn popcnt_v32i16(x: i16x32) -> i16x32; |
39 | #[link_name = "llvm.ctpop.v16i16" ] |
40 | fn popcnt_v16i16(x: i16x16) -> i16x16; |
41 | #[link_name = "llvm.ctpop.v8i16" ] |
42 | fn popcnt_v8i16(x: i16x8) -> i16x8; |
43 | |
44 | #[link_name = "llvm.ctpop.v64i8" ] |
45 | fn popcnt_v64i8(x: i8x64) -> i8x64; |
46 | #[link_name = "llvm.ctpop.v32i8" ] |
47 | fn popcnt_v32i8(x: i8x32) -> i8x32; |
48 | #[link_name = "llvm.ctpop.v16i8" ] |
49 | fn popcnt_v16i8(x: i8x16) -> i8x16; |
50 | |
51 | #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512" ] |
52 | fn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64; |
53 | #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256" ] |
54 | fn bitshuffle_256(data: i8x32, indices: i8x32, mask: __mmask32) -> __mmask32; |
55 | #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.128" ] |
56 | fn bitshuffle_128(data: i8x16, indices: i8x16, mask: __mmask16) -> __mmask16; |
57 | } |
58 | |
59 | /// For each packed 16-bit integer maps the value to the number of logical 1 bits. |
60 | /// |
61 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi16) |
62 | #[inline ] |
63 | #[target_feature (enable = "avx512bitalg" )] |
64 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
65 | #[cfg_attr (test, assert_instr(vpopcntw))] |
66 | pub unsafe fn _mm512_popcnt_epi16(a: __m512i) -> __m512i { |
67 | transmute(src:popcnt_v32i16(a.as_i16x32())) |
68 | } |
69 | |
70 | /// For each packed 16-bit integer maps the value to the number of logical 1 bits. |
71 | /// |
72 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
73 | /// Otherwise the computation result is written into the result. |
74 | /// |
75 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi16) |
76 | #[inline ] |
77 | #[target_feature (enable = "avx512bitalg" )] |
78 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
79 | #[cfg_attr (test, assert_instr(vpopcntw))] |
80 | pub unsafe fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i { |
81 | let zero: i16x32 = _mm512_setzero_si512().as_i16x32(); |
82 | transmute(src:simd_select_bitmask(m:k, yes:popcnt_v32i16(a.as_i16x32()), no:zero)) |
83 | } |
84 | |
85 | /// For each packed 16-bit integer maps the value to the number of logical 1 bits. |
86 | /// |
87 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
88 | /// Otherwise the computation result is written into the result. |
89 | /// |
90 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi16) |
91 | #[inline ] |
92 | #[target_feature (enable = "avx512bitalg" )] |
93 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
94 | #[cfg_attr (test, assert_instr(vpopcntw))] |
95 | pub unsafe fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i { |
96 | transmute(src:simd_select_bitmask( |
97 | m:k, |
98 | yes:popcnt_v32i16(a.as_i16x32()), |
99 | no:src.as_i16x32(), |
100 | )) |
101 | } |
102 | |
103 | /// For each packed 16-bit integer maps the value to the number of logical 1 bits. |
104 | /// |
105 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi16) |
106 | #[inline ] |
107 | #[target_feature (enable = "avx512bitalg,avx512vl" )] |
108 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
109 | #[cfg_attr (test, assert_instr(vpopcntw))] |
110 | pub unsafe fn _mm256_popcnt_epi16(a: __m256i) -> __m256i { |
111 | transmute(src:popcnt_v16i16(a.as_i16x16())) |
112 | } |
113 | |
114 | /// For each packed 16-bit integer maps the value to the number of logical 1 bits. |
115 | /// |
116 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
117 | /// Otherwise the computation result is written into the result. |
118 | /// |
119 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi16) |
120 | #[inline ] |
121 | #[target_feature (enable = "avx512bitalg,avx512vl" )] |
122 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
123 | #[cfg_attr (test, assert_instr(vpopcntw))] |
124 | pub unsafe fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i { |
125 | let zero: i16x16 = _mm256_setzero_si256().as_i16x16(); |
126 | transmute(src:simd_select_bitmask(m:k, yes:popcnt_v16i16(a.as_i16x16()), no:zero)) |
127 | } |
128 | |
129 | /// For each packed 16-bit integer maps the value to the number of logical 1 bits. |
130 | /// |
131 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
132 | /// Otherwise the computation result is written into the result. |
133 | /// |
134 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi16) |
135 | #[inline ] |
136 | #[target_feature (enable = "avx512bitalg,avx512vl" )] |
137 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
138 | #[cfg_attr (test, assert_instr(vpopcntw))] |
139 | pub unsafe fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i { |
140 | transmute(src:simd_select_bitmask( |
141 | m:k, |
142 | yes:popcnt_v16i16(a.as_i16x16()), |
143 | no:src.as_i16x16(), |
144 | )) |
145 | } |
146 | |
147 | /// For each packed 16-bit integer maps the value to the number of logical 1 bits. |
148 | /// |
149 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi16) |
150 | #[inline ] |
151 | #[target_feature (enable = "avx512bitalg,avx512vl" )] |
152 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
153 | #[cfg_attr (test, assert_instr(vpopcntw))] |
154 | pub unsafe fn _mm_popcnt_epi16(a: __m128i) -> __m128i { |
155 | transmute(src:popcnt_v8i16(a.as_i16x8())) |
156 | } |
157 | |
158 | /// For each packed 16-bit integer maps the value to the number of logical 1 bits. |
159 | /// |
160 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
161 | /// Otherwise the computation result is written into the result. |
162 | /// |
163 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi16) |
164 | #[inline ] |
165 | #[target_feature (enable = "avx512bitalg,avx512vl" )] |
166 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
167 | #[cfg_attr (test, assert_instr(vpopcntw))] |
168 | pub unsafe fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i { |
169 | let zero: i16x8 = _mm_setzero_si128().as_i16x8(); |
170 | transmute(src:simd_select_bitmask(m:k, yes:popcnt_v8i16(a.as_i16x8()), no:zero)) |
171 | } |
172 | |
173 | /// For each packed 16-bit integer maps the value to the number of logical 1 bits. |
174 | /// |
175 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
176 | /// Otherwise the computation result is written into the result. |
177 | /// |
178 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi16) |
179 | #[inline ] |
180 | #[target_feature (enable = "avx512bitalg,avx512vl" )] |
181 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
182 | #[cfg_attr (test, assert_instr(vpopcntw))] |
183 | pub unsafe fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
184 | transmute(src:simd_select_bitmask( |
185 | m:k, |
186 | yes:popcnt_v8i16(a.as_i16x8()), |
187 | no:src.as_i16x8(), |
188 | )) |
189 | } |
190 | |
191 | /// For each packed 8-bit integer maps the value to the number of logical 1 bits. |
192 | /// |
193 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi8) |
194 | #[inline ] |
195 | #[target_feature (enable = "avx512bitalg" )] |
196 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
197 | #[cfg_attr (test, assert_instr(vpopcntb))] |
198 | pub unsafe fn _mm512_popcnt_epi8(a: __m512i) -> __m512i { |
199 | transmute(src:popcnt_v64i8(a.as_i8x64())) |
200 | } |
201 | |
202 | /// For each packed 8-bit integer maps the value to the number of logical 1 bits. |
203 | /// |
204 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
205 | /// Otherwise the computation result is written into the result. |
206 | /// |
207 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi8) |
208 | #[inline ] |
209 | #[target_feature (enable = "avx512bitalg" )] |
210 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
211 | #[cfg_attr (test, assert_instr(vpopcntb))] |
212 | pub unsafe fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i { |
213 | let zero: i8x64 = _mm512_setzero_si512().as_i8x64(); |
214 | transmute(src:simd_select_bitmask(m:k, yes:popcnt_v64i8(a.as_i8x64()), no:zero)) |
215 | } |
216 | |
217 | /// For each packed 8-bit integer maps the value to the number of logical 1 bits. |
218 | /// |
219 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
220 | /// Otherwise the computation result is written into the result. |
221 | /// |
222 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi8) |
223 | #[inline ] |
224 | #[target_feature (enable = "avx512bitalg" )] |
225 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
226 | #[cfg_attr (test, assert_instr(vpopcntb))] |
227 | pub unsafe fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i { |
228 | transmute(src:simd_select_bitmask( |
229 | m:k, |
230 | yes:popcnt_v64i8(a.as_i8x64()), |
231 | no:src.as_i8x64(), |
232 | )) |
233 | } |
234 | |
235 | /// For each packed 8-bit integer maps the value to the number of logical 1 bits. |
236 | /// |
237 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi8) |
238 | #[inline ] |
239 | #[target_feature (enable = "avx512bitalg,avx512vl" )] |
240 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
241 | #[cfg_attr (test, assert_instr(vpopcntb))] |
242 | pub unsafe fn _mm256_popcnt_epi8(a: __m256i) -> __m256i { |
243 | transmute(src:popcnt_v32i8(a.as_i8x32())) |
244 | } |
245 | |
246 | /// For each packed 8-bit integer maps the value to the number of logical 1 bits. |
247 | /// |
248 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
249 | /// Otherwise the computation result is written into the result. |
250 | /// |
251 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi8) |
252 | #[inline ] |
253 | #[target_feature (enable = "avx512bitalg,avx512vl" )] |
254 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
255 | #[cfg_attr (test, assert_instr(vpopcntb))] |
256 | pub unsafe fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i { |
257 | let zero: i8x32 = _mm256_setzero_si256().as_i8x32(); |
258 | transmute(src:simd_select_bitmask(m:k, yes:popcnt_v32i8(a.as_i8x32()), no:zero)) |
259 | } |
260 | |
261 | /// For each packed 8-bit integer maps the value to the number of logical 1 bits. |
262 | /// |
263 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
264 | /// Otherwise the computation result is written into the result. |
265 | /// |
266 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi8) |
267 | #[inline ] |
268 | #[target_feature (enable = "avx512bitalg,avx512vl" )] |
269 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
270 | #[cfg_attr (test, assert_instr(vpopcntb))] |
271 | pub unsafe fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i { |
272 | transmute(src:simd_select_bitmask( |
273 | m:k, |
274 | yes:popcnt_v32i8(a.as_i8x32()), |
275 | no:src.as_i8x32(), |
276 | )) |
277 | } |
278 | |
279 | /// For each packed 8-bit integer maps the value to the number of logical 1 bits. |
280 | /// |
281 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi8) |
282 | #[inline ] |
283 | #[target_feature (enable = "avx512bitalg,avx512vl" )] |
284 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
285 | #[cfg_attr (test, assert_instr(vpopcntb))] |
286 | pub unsafe fn _mm_popcnt_epi8(a: __m128i) -> __m128i { |
287 | transmute(src:popcnt_v16i8(a.as_i8x16())) |
288 | } |
289 | |
290 | /// For each packed 8-bit integer maps the value to the number of logical 1 bits. |
291 | /// |
292 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
293 | /// Otherwise the computation result is written into the result. |
294 | /// |
295 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi8) |
296 | #[inline ] |
297 | #[target_feature (enable = "avx512bitalg,avx512vl" )] |
298 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
299 | #[cfg_attr (test, assert_instr(vpopcntb))] |
300 | pub unsafe fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i { |
301 | let zero: i8x16 = _mm_setzero_si128().as_i8x16(); |
302 | transmute(src:simd_select_bitmask(m:k, yes:popcnt_v16i8(a.as_i8x16()), no:zero)) |
303 | } |
304 | |
305 | /// For each packed 8-bit integer maps the value to the number of logical 1 bits. |
306 | /// |
307 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
308 | /// Otherwise the computation result is written into the result. |
309 | /// |
310 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi8) |
311 | #[inline ] |
312 | #[target_feature (enable = "avx512bitalg,avx512vl" )] |
313 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
314 | #[cfg_attr (test, assert_instr(vpopcntb))] |
315 | pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i { |
316 | transmute(src:simd_select_bitmask( |
317 | m:k, |
318 | yes:popcnt_v16i8(a.as_i8x16()), |
319 | no:src.as_i8x16(), |
320 | )) |
321 | } |
322 | |
323 | /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. |
324 | /// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. |
325 | /// It then selects these bits and packs them into the output. |
326 | /// |
327 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bitshuffle_epi64_mask) |
328 | #[inline ] |
329 | #[target_feature (enable = "avx512bitalg" )] |
330 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
331 | #[cfg_attr (test, assert_instr(vpshufbitqmb))] |
332 | pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 { |
333 | bitshuffle_512(data:b.as_i8x64(), indices:c.as_i8x64(), !0) |
334 | } |
335 | |
336 | /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. |
337 | /// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. |
338 | /// It then selects these bits and packs them into the output. |
339 | /// |
340 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
341 | /// Otherwise the computation result is written into the result. |
342 | /// |
343 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_bitshuffle_epi64_mask) |
344 | #[inline ] |
345 | #[target_feature (enable = "avx512bitalg" )] |
346 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
347 | #[cfg_attr (test, assert_instr(vpshufbitqmb))] |
348 | pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 { |
349 | bitshuffle_512(data:b.as_i8x64(), indices:c.as_i8x64(), mask:k) |
350 | } |
351 | |
352 | /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. |
353 | /// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. |
354 | /// It then selects these bits and packs them into the output. |
355 | /// |
356 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bitshuffle_epi64_mask) |
357 | #[inline ] |
358 | #[target_feature (enable = "avx512bitalg,avx512vl" )] |
359 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
360 | #[cfg_attr (test, assert_instr(vpshufbitqmb))] |
361 | pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 { |
362 | bitshuffle_256(data:b.as_i8x32(), indices:c.as_i8x32(), !0) |
363 | } |
364 | |
365 | /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. |
366 | /// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. |
367 | /// It then selects these bits and packs them into the output. |
368 | /// |
369 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
370 | /// Otherwise the computation result is written into the result. |
371 | /// |
372 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_bitshuffle_epi64_mask) |
373 | #[inline ] |
374 | #[target_feature (enable = "avx512bitalg,avx512vl" )] |
375 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
376 | #[cfg_attr (test, assert_instr(vpshufbitqmb))] |
377 | pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 { |
378 | bitshuffle_256(data:b.as_i8x32(), indices:c.as_i8x32(), mask:k) |
379 | } |
380 | |
381 | /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. |
382 | /// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. |
383 | /// It then selects these bits and packs them into the output. |
384 | /// |
385 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bitshuffle_epi64_mask) |
386 | #[inline ] |
387 | #[target_feature (enable = "avx512bitalg,avx512vl" )] |
388 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
389 | #[cfg_attr (test, assert_instr(vpshufbitqmb))] |
390 | pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 { |
391 | bitshuffle_128(data:b.as_i8x16(), indices:c.as_i8x16(), !0) |
392 | } |
393 | |
394 | /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. |
395 | /// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. |
396 | /// It then selects these bits and packs them into the output. |
397 | /// |
398 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
399 | /// Otherwise the computation result is written into the result. |
400 | /// |
401 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_bitshuffle_epi64_mask) |
402 | #[inline ] |
403 | #[target_feature (enable = "avx512bitalg,avx512vl" )] |
404 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
405 | #[cfg_attr (test, assert_instr(vpshufbitqmb))] |
406 | pub unsafe fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 { |
407 | bitshuffle_128(data:b.as_i8x16(), indices:c.as_i8x16(), mask:k) |
408 | } |
409 | |
410 | #[cfg (test)] |
411 | mod tests { |
412 | // Some of the constants in the tests below are just bit patterns. They should not |
413 | // be interpreted as integers; signedness does not make sense for them, but |
414 | // __mXXXi happens to be defined in terms of signed integers. |
415 | #![allow (overflowing_literals)] |
416 | |
417 | use stdarch_test::simd_test; |
418 | |
419 | use crate::core_arch::x86::*; |
420 | |
421 | #[simd_test(enable = "avx512bitalg,avx512f" )] |
422 | unsafe fn test_mm512_popcnt_epi16() { |
423 | let test_data = _mm512_set_epi16( |
424 | 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, |
425 | 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512, |
426 | 1024, 2048, |
427 | ); |
428 | let actual_result = _mm512_popcnt_epi16(test_data); |
429 | let reference_result = _mm512_set_epi16( |
430 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 12, 8, 1, 1, 1, 1, 1, 1, |
431 | 1, 1, 1, 1, 1, 1, |
432 | ); |
433 | assert_eq_m512i(actual_result, reference_result); |
434 | } |
435 | |
436 | #[simd_test(enable = "avx512bitalg,avx512f" )] |
437 | unsafe fn test_mm512_maskz_popcnt_epi16() { |
438 | let test_data = _mm512_set_epi16( |
439 | 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, |
440 | 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512, |
441 | 1024, 2048, |
442 | ); |
443 | let mask = 0xFF_FF_00_00; |
444 | let actual_result = _mm512_maskz_popcnt_epi16(mask, test_data); |
445 | let reference_result = _mm512_set_epi16( |
446 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
447 | 0, 0, 0, 0, 0, |
448 | ); |
449 | assert_eq_m512i(actual_result, reference_result); |
450 | } |
451 | |
452 | #[simd_test(enable = "avx512bitalg,avx512f" )] |
453 | unsafe fn test_mm512_mask_popcnt_epi16() { |
454 | let test_data = _mm512_set_epi16( |
455 | 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, |
456 | 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512, |
457 | 1024, 2048, |
458 | ); |
459 | let mask = 0xFF_FF_00_00; |
460 | let actual_result = _mm512_mask_popcnt_epi16(test_data, mask, test_data); |
461 | let reference_result = _mm512_set_epi16( |
462 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF_FF, -1, -100, 255, 256, 2, |
463 | 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, |
464 | ); |
465 | assert_eq_m512i(actual_result, reference_result); |
466 | } |
467 | |
468 | #[simd_test(enable = "avx512bitalg,avx512f,avx512vl" )] |
469 | unsafe fn test_mm256_popcnt_epi16() { |
470 | let test_data = _mm256_set_epi16( |
471 | 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, |
472 | 0x3F_FF, 0x7F_FF, |
473 | ); |
474 | let actual_result = _mm256_popcnt_epi16(test_data); |
475 | let reference_result = |
476 | _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
477 | assert_eq_m256i(actual_result, reference_result); |
478 | } |
479 | |
480 | #[simd_test(enable = "avx512bitalg,avx512f,avx512vl" )] |
481 | unsafe fn test_mm256_maskz_popcnt_epi16() { |
482 | let test_data = _mm256_set_epi16( |
483 | 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, |
484 | 0x3F_FF, 0x7F_FF, |
485 | ); |
486 | let mask = 0xFF_00; |
487 | let actual_result = _mm256_maskz_popcnt_epi16(mask, test_data); |
488 | let reference_result = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0); |
489 | assert_eq_m256i(actual_result, reference_result); |
490 | } |
491 | |
492 | #[simd_test(enable = "avx512bitalg,avx512f,avx512vl" )] |
493 | unsafe fn test_mm256_mask_popcnt_epi16() { |
494 | let test_data = _mm256_set_epi16( |
495 | 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, |
496 | 0x3F_FF, 0x7F_FF, |
497 | ); |
498 | let mask = 0xFF_00; |
499 | let actual_result = _mm256_mask_popcnt_epi16(test_data, mask, test_data); |
500 | let reference_result = _mm256_set_epi16( |
501 | 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, 0x3F_FF, 0x7F_FF, |
502 | ); |
503 | assert_eq_m256i(actual_result, reference_result); |
504 | } |
505 | |
506 | #[simd_test(enable = "avx512bitalg,avx512f,avx512vl" )] |
507 | unsafe fn test_mm_popcnt_epi16() { |
508 | let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F); |
509 | let actual_result = _mm_popcnt_epi16(test_data); |
510 | let reference_result = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
511 | assert_eq_m128i(actual_result, reference_result); |
512 | } |
513 | |
514 | #[simd_test(enable = "avx512bitalg,avx512f,avx512vl" )] |
515 | unsafe fn test_mm_maskz_popcnt_epi16() { |
516 | let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F); |
517 | let mask = 0xF0; |
518 | let actual_result = _mm_maskz_popcnt_epi16(mask, test_data); |
519 | let reference_result = _mm_set_epi16(0, 1, 2, 3, 0, 0, 0, 0); |
520 | assert_eq_m128i(actual_result, reference_result); |
521 | } |
522 | |
523 | #[simd_test(enable = "avx512bitalg,avx512f,avx512vl" )] |
524 | unsafe fn test_mm_mask_popcnt_epi16() { |
525 | let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F); |
526 | let mask = 0xF0; |
527 | let actual_result = _mm_mask_popcnt_epi16(test_data, mask, test_data); |
528 | let reference_result = _mm_set_epi16(0, 1, 2, 3, 0xF, 0x1F, 0x3F, 0x7F); |
529 | assert_eq_m128i(actual_result, reference_result); |
530 | } |
531 | |
532 | #[simd_test(enable = "avx512bitalg,avx512f" )] |
533 | unsafe fn test_mm512_popcnt_epi8() { |
534 | let test_data = _mm512_set_epi8( |
535 | 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100, |
536 | 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189, |
537 | 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90, |
538 | 225, 21, 249, 211, 155, 228, 70, |
539 | ); |
540 | let actual_result = _mm512_popcnt_epi8(test_data); |
541 | let reference_result = _mm512_set_epi8( |
542 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5, |
543 | 2, 4, 4, 6, 4, 3, 3, 5, 6, 3, 3, 5, 6, 4, 4, 4, 3, 3, 6, 7, 3, 5, 5, 3, 4, 5, 3, 4, 4, |
544 | 3, 6, 5, 5, 4, 3, |
545 | ); |
546 | assert_eq_m512i(actual_result, reference_result); |
547 | } |
548 | |
549 | #[simd_test(enable = "avx512bitalg,avx512f" )] |
550 | unsafe fn test_mm512_maskz_popcnt_epi8() { |
551 | let test_data = _mm512_set_epi8( |
552 | 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100, |
553 | 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189, |
554 | 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90, |
555 | 225, 21, 249, 211, 155, 228, 70, |
556 | ); |
557 | let mask = 0xFF_FF_FF_FF_00_00_00_00; |
558 | let actual_result = _mm512_maskz_popcnt_epi8(mask, test_data); |
559 | let reference_result = _mm512_set_epi8( |
560 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5, |
561 | 2, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
562 | 0, 0, 0, 0, 0, 0, |
563 | ); |
564 | assert_eq_m512i(actual_result, reference_result); |
565 | } |
566 | |
567 | #[simd_test(enable = "avx512bitalg,avx512f" )] |
568 | unsafe fn test_mm512_mask_popcnt_epi8() { |
569 | let test_data = _mm512_set_epi8( |
570 | 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100, |
571 | 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189, |
572 | 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90, |
573 | 225, 21, 249, 211, 155, 228, 70, |
574 | ); |
575 | let mask = 0xFF_FF_FF_FF_00_00_00_00; |
576 | let actual_result = _mm512_mask_popcnt_epi8(test_data, mask, test_data); |
577 | let reference_result = _mm512_set_epi8( |
578 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5, |
579 | 2, 4, 4, 183, 154, 84, 56, 227, 189, 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, |
580 | 251, 73, 121, 143, 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70, |
581 | ); |
582 | assert_eq_m512i(actual_result, reference_result); |
583 | } |
584 | |
585 | #[simd_test(enable = "avx512bitalg,avx512f,avx512vl" )] |
586 | unsafe fn test_mm256_popcnt_epi8() { |
587 | let test_data = _mm256_set_epi8( |
588 | 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100, |
589 | 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, |
590 | ); |
591 | let actual_result = _mm256_popcnt_epi8(test_data); |
592 | let reference_result = _mm256_set_epi8( |
593 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5, |
594 | 2, 4, 4, |
595 | ); |
596 | assert_eq_m256i(actual_result, reference_result); |
597 | } |
598 | |
599 | #[simd_test(enable = "avx512bitalg,avx512f,avx512vl" )] |
600 | unsafe fn test_mm256_maskz_popcnt_epi8() { |
601 | let test_data = _mm256_set_epi8( |
602 | 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143, |
603 | 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70, |
604 | ); |
605 | let mask = 0xFF_FF_00_00; |
606 | let actual_result = _mm256_maskz_popcnt_epi8(mask, test_data); |
607 | let reference_result = _mm256_set_epi8( |
608 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
609 | 0, 0, 0, |
610 | ); |
611 | assert_eq_m256i(actual_result, reference_result); |
612 | } |
613 | |
614 | #[simd_test(enable = "avx512bitalg,avx512f,avx512vl" )] |
615 | unsafe fn test_mm256_mask_popcnt_epi8() { |
616 | let test_data = _mm256_set_epi8( |
617 | 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143, |
618 | 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70, |
619 | ); |
620 | let mask = 0xFF_FF_00_00; |
621 | let actual_result = _mm256_mask_popcnt_epi8(test_data, mask, test_data); |
622 | let reference_result = _mm256_set_epi8( |
623 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 251, 73, 121, 143, 145, 85, 91, 137, |
624 | 90, 225, 21, 249, 211, 155, 228, 70, |
625 | ); |
626 | assert_eq_m256i(actual_result, reference_result); |
627 | } |
628 | |
629 | #[simd_test(enable = "avx512bitalg,avx512f,avx512vl" )] |
630 | unsafe fn test_mm_popcnt_epi8() { |
631 | let test_data = _mm_set_epi8( |
632 | 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, |
633 | ); |
634 | let actual_result = _mm_popcnt_epi8(test_data); |
635 | let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1); |
636 | assert_eq_m128i(actual_result, reference_result); |
637 | } |
638 | |
639 | #[simd_test(enable = "avx512bitalg,avx512f,avx512vl" )] |
640 | unsafe fn test_mm_maskz_popcnt_epi8() { |
641 | let test_data = _mm_set_epi8( |
642 | 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70, |
643 | ); |
644 | let mask = 0xFF_00; |
645 | let actual_result = _mm_maskz_popcnt_epi8(mask, test_data); |
646 | let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0); |
647 | assert_eq_m128i(actual_result, reference_result); |
648 | } |
649 | |
650 | #[simd_test(enable = "avx512bitalg,avx512f,avx512vl" )] |
651 | unsafe fn test_mm_mask_popcnt_epi8() { |
652 | let test_data = _mm_set_epi8( |
653 | 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70, |
654 | ); |
655 | let mask = 0xFF_00; |
656 | let actual_result = _mm_mask_popcnt_epi8(test_data, mask, test_data); |
657 | let reference_result = |
658 | _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 90, 225, 21, 249, 211, 155, 228, 70); |
659 | assert_eq_m128i(actual_result, reference_result); |
660 | } |
661 | |
662 | #[simd_test(enable = "avx512bitalg,avx512f" )] |
663 | unsafe fn test_mm512_bitshuffle_epi64_mask() { |
664 | let test_indices = _mm512_set_epi8( |
665 | 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0, |
666 | 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, |
667 | 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, |
668 | ); |
669 | let test_data = _mm512_setr_epi64( |
670 | 0xFF_FF_FF_FF_00_00_00_00, |
671 | 0xFF_00_FF_00_FF_00_FF_00, |
672 | 0xFF_00_00_00_00_00_00_00, |
673 | 0xAC_00_00_00_00_00_00_00, |
674 | 0xFF_FF_FF_FF_00_00_00_00, |
675 | 0xFF_00_FF_00_FF_00_FF_00, |
676 | 0xFF_00_00_00_00_00_00_00, |
677 | 0xAC_00_00_00_00_00_00_00, |
678 | ); |
679 | let actual_result = _mm512_bitshuffle_epi64_mask(test_data, test_indices); |
680 | let reference_result = 0xF0 << 0 |
681 | | 0x03 << 8 |
682 | | 0xFF << 16 |
683 | | 0xAC << 24 |
684 | | 0xF0 << 32 |
685 | | 0x03 << 40 |
686 | | 0xFF << 48 |
687 | | 0xAC << 56; |
688 | |
689 | assert_eq!(actual_result, reference_result); |
690 | } |
691 | |
692 | #[simd_test(enable = "avx512bitalg,avx512f" )] |
693 | unsafe fn test_mm512_mask_bitshuffle_epi64_mask() { |
694 | let test_indices = _mm512_set_epi8( |
695 | 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0, |
696 | 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, |
697 | 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, |
698 | ); |
699 | let test_data = _mm512_setr_epi64( |
700 | 0xFF_FF_FF_FF_00_00_00_00, |
701 | 0xFF_00_FF_00_FF_00_FF_00, |
702 | 0xFF_00_00_00_00_00_00_00, |
703 | 0xAC_00_00_00_00_00_00_00, |
704 | 0xFF_FF_FF_FF_00_00_00_00, |
705 | 0xFF_00_FF_00_FF_00_FF_00, |
706 | 0xFF_00_00_00_00_00_00_00, |
707 | 0xAC_00_00_00_00_00_00_00, |
708 | ); |
709 | let mask = 0xFF_FF_FF_FF_00_00_00_00; |
710 | let actual_result = _mm512_mask_bitshuffle_epi64_mask(mask, test_data, test_indices); |
711 | let reference_result = 0x00 << 0 |
712 | | 0x00 << 8 |
713 | | 0x00 << 16 |
714 | | 0x00 << 24 |
715 | | 0xF0 << 32 |
716 | | 0x03 << 40 |
717 | | 0xFF << 48 |
718 | | 0xAC << 56; |
719 | |
720 | assert_eq!(actual_result, reference_result); |
721 | } |
722 | |
723 | #[simd_test(enable = "avx512bitalg,avx512f,avx512vl" )] |
724 | unsafe fn test_mm256_bitshuffle_epi64_mask() { |
725 | let test_indices = _mm256_set_epi8( |
726 | 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0, |
727 | 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, |
728 | ); |
729 | let test_data = _mm256_setr_epi64x( |
730 | 0xFF_FF_FF_FF_00_00_00_00, |
731 | 0xFF_00_FF_00_FF_00_FF_00, |
732 | 0xFF_00_00_00_00_00_00_00, |
733 | 0xAC_00_00_00_00_00_00_00, |
734 | ); |
735 | let actual_result = _mm256_bitshuffle_epi64_mask(test_data, test_indices); |
736 | let reference_result = 0xF0 << 0 | 0x03 << 8 | 0xFF << 16 | 0xAC << 24; |
737 | |
738 | assert_eq!(actual_result, reference_result); |
739 | } |
740 | |
741 | #[simd_test(enable = "avx512bitalg,avx512f,avx512vl" )] |
742 | unsafe fn test_mm256_mask_bitshuffle_epi64_mask() { |
743 | let test_indices = _mm256_set_epi8( |
744 | 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0, |
745 | 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, |
746 | ); |
747 | let test_data = _mm256_setr_epi64x( |
748 | 0xFF_FF_FF_FF_00_00_00_00, |
749 | 0xFF_00_FF_00_FF_00_FF_00, |
750 | 0xFF_00_00_00_00_00_00_00, |
751 | 0xAC_00_00_00_00_00_00_00, |
752 | ); |
753 | let mask = 0xFF_FF_00_00; |
754 | let actual_result = _mm256_mask_bitshuffle_epi64_mask(mask, test_data, test_indices); |
755 | let reference_result = 0x00 << 0 | 0x00 << 8 | 0xFF << 16 | 0xAC << 24; |
756 | |
757 | assert_eq!(actual_result, reference_result); |
758 | } |
759 | |
760 | #[simd_test(enable = "avx512bitalg,avx512f,avx512vl" )] |
761 | unsafe fn test_mm_bitshuffle_epi64_mask() { |
762 | let test_indices = _mm_set_epi8( |
763 | 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, |
764 | ); |
765 | let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00); |
766 | let actual_result = _mm_bitshuffle_epi64_mask(test_data, test_indices); |
767 | let reference_result = 0xFF << 0 | 0xAC << 8; |
768 | |
769 | assert_eq!(actual_result, reference_result); |
770 | } |
771 | |
772 | #[simd_test(enable = "avx512bitalg,avx512f,avx512vl" )] |
773 | unsafe fn test_mm_mask_bitshuffle_epi64_mask() { |
774 | let test_indices = _mm_set_epi8( |
775 | 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, |
776 | ); |
777 | let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00); |
778 | let mask = 0xFF_00; |
779 | let actual_result = _mm_mask_bitshuffle_epi64_mask(mask, test_data, test_indices); |
780 | let reference_result = 0x00 << 0 | 0xAC << 8; |
781 | |
782 | assert_eq!(actual_result, reference_result); |
783 | } |
784 | } |
785 | |