1//! Bit-oriented Algorithms (BITALG)
2//!
3//! The intrinsics here correspond to those in the `immintrin.h` C header.
4//!
5//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
6//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
7//!
8//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
9
10use crate::core_arch::simd::i8x16;
11use crate::core_arch::simd::i8x32;
12use crate::core_arch::simd::i8x64;
13use crate::core_arch::simd::i16x8;
14use crate::core_arch::simd::i16x16;
15use crate::core_arch::simd::i16x32;
16use crate::core_arch::x86::__m128i;
17use crate::core_arch::x86::__m256i;
18use crate::core_arch::x86::__m512i;
19use crate::core_arch::x86::__mmask8;
20use crate::core_arch::x86::__mmask16;
21use crate::core_arch::x86::__mmask32;
22use crate::core_arch::x86::__mmask64;
23use crate::intrinsics::simd::{simd_ctpop, simd_select_bitmask};
24use crate::mem::transmute;
25
26#[cfg(test)]
27use stdarch_test::assert_instr;
28
29#[allow(improper_ctypes)]
30unsafe extern "C" {
31 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512"]
32 unsafefn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64;
33 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256"]
34 unsafefn bitshuffle_256(data: i8x32, indices: i8x32, mask: __mmask32) -> __mmask32;
35 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.128"]
36 unsafefn bitshuffle_128(data: i8x16, indices: i8x16, mask: __mmask16) -> __mmask16;
37}
38
39/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
40///
41/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi16)
42#[inline]
43#[target_feature(enable = "avx512bitalg")]
44#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
45#[cfg_attr(test, assert_instr(vpopcntw))]
46pub fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
47 unsafe { transmute(src:simd_ctpop(a.as_i16x32())) }
48}
49
50/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
51///
52/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
53/// Otherwise the computation result is written into the result.
54///
55/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi16)
56#[inline]
57#[target_feature(enable = "avx512bitalg")]
58#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
59#[cfg_attr(test, assert_instr(vpopcntw))]
60pub fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
61 unsafe {
62 transmute(src:simd_select_bitmask(
63 m:k,
64 yes:simd_ctpop(a.as_i16x32()),
65 no:i16x32::ZERO,
66 ))
67 }
68}
69
70/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
71///
72/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
73/// Otherwise the computation result is written into the result.
74///
75/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi16)
76#[inline]
77#[target_feature(enable = "avx512bitalg")]
78#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
79#[cfg_attr(test, assert_instr(vpopcntw))]
80pub fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
81 unsafe {
82 transmute(src:simd_select_bitmask(
83 m:k,
84 yes:simd_ctpop(a.as_i16x32()),
85 no:src.as_i16x32(),
86 ))
87 }
88}
89
90/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
91///
92/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi16)
93#[inline]
94#[target_feature(enable = "avx512bitalg,avx512vl")]
95#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
96#[cfg_attr(test, assert_instr(vpopcntw))]
97pub fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
98 unsafe { transmute(src:simd_ctpop(a.as_i16x16())) }
99}
100
101/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
102///
103/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
104/// Otherwise the computation result is written into the result.
105///
106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi16)
107#[inline]
108#[target_feature(enable = "avx512bitalg,avx512vl")]
109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
110#[cfg_attr(test, assert_instr(vpopcntw))]
111pub fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
112 unsafe {
113 transmute(src:simd_select_bitmask(
114 m:k,
115 yes:simd_ctpop(a.as_i16x16()),
116 no:i16x16::ZERO,
117 ))
118 }
119}
120
121/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
122///
123/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
124/// Otherwise the computation result is written into the result.
125///
126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi16)
127#[inline]
128#[target_feature(enable = "avx512bitalg,avx512vl")]
129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
130#[cfg_attr(test, assert_instr(vpopcntw))]
131pub fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
132 unsafe {
133 transmute(src:simd_select_bitmask(
134 m:k,
135 yes:simd_ctpop(a.as_i16x16()),
136 no:src.as_i16x16(),
137 ))
138 }
139}
140
141/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
142///
143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi16)
144#[inline]
145#[target_feature(enable = "avx512bitalg,avx512vl")]
146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
147#[cfg_attr(test, assert_instr(vpopcntw))]
148pub fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
149 unsafe { transmute(src:simd_ctpop(a.as_i16x8())) }
150}
151
152/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
153///
154/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
155/// Otherwise the computation result is written into the result.
156///
157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi16)
158#[inline]
159#[target_feature(enable = "avx512bitalg,avx512vl")]
160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
161#[cfg_attr(test, assert_instr(vpopcntw))]
162pub fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
163 unsafe {
164 transmute(src:simd_select_bitmask(
165 m:k,
166 yes:simd_ctpop(a.as_i16x8()),
167 no:i16x8::ZERO,
168 ))
169 }
170}
171
172/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
173///
174/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
175/// Otherwise the computation result is written into the result.
176///
177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi16)
178#[inline]
179#[target_feature(enable = "avx512bitalg,avx512vl")]
180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
181#[cfg_attr(test, assert_instr(vpopcntw))]
182pub fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
183 unsafe {
184 transmute(src:simd_select_bitmask(
185 m:k,
186 yes:simd_ctpop(a.as_i16x8()),
187 no:src.as_i16x8(),
188 ))
189 }
190}
191
192/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
193///
194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi8)
195#[inline]
196#[target_feature(enable = "avx512bitalg")]
197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
198#[cfg_attr(test, assert_instr(vpopcntb))]
199pub fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
200 unsafe { transmute(src:simd_ctpop(a.as_i8x64())) }
201}
202
203/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
204///
205/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
206/// Otherwise the computation result is written into the result.
207///
208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi8)
209#[inline]
210#[target_feature(enable = "avx512bitalg")]
211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
212#[cfg_attr(test, assert_instr(vpopcntb))]
213pub fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
214 unsafe {
215 transmute(src:simd_select_bitmask(
216 m:k,
217 yes:simd_ctpop(a.as_i8x64()),
218 no:i8x64::ZERO,
219 ))
220 }
221}
222
223/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
224///
225/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
226/// Otherwise the computation result is written into the result.
227///
228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi8)
229#[inline]
230#[target_feature(enable = "avx512bitalg")]
231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
232#[cfg_attr(test, assert_instr(vpopcntb))]
233pub fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
234 unsafe {
235 transmute(src:simd_select_bitmask(
236 m:k,
237 yes:simd_ctpop(a.as_i8x64()),
238 no:src.as_i8x64(),
239 ))
240 }
241}
242
243/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
244///
245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi8)
246#[inline]
247#[target_feature(enable = "avx512bitalg,avx512vl")]
248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
249#[cfg_attr(test, assert_instr(vpopcntb))]
250pub fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
251 unsafe { transmute(src:simd_ctpop(a.as_i8x32())) }
252}
253
254/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
255///
256/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
257/// Otherwise the computation result is written into the result.
258///
259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi8)
260#[inline]
261#[target_feature(enable = "avx512bitalg,avx512vl")]
262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
263#[cfg_attr(test, assert_instr(vpopcntb))]
264pub fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
265 unsafe {
266 transmute(src:simd_select_bitmask(
267 m:k,
268 yes:simd_ctpop(a.as_i8x32()),
269 no:i8x32::ZERO,
270 ))
271 }
272}
273
274/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
275///
276/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
277/// Otherwise the computation result is written into the result.
278///
279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi8)
280#[inline]
281#[target_feature(enable = "avx512bitalg,avx512vl")]
282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
283#[cfg_attr(test, assert_instr(vpopcntb))]
284pub fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
285 unsafe {
286 transmute(src:simd_select_bitmask(
287 m:k,
288 yes:simd_ctpop(a.as_i8x32()),
289 no:src.as_i8x32(),
290 ))
291 }
292}
293
294/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
295///
296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi8)
297#[inline]
298#[target_feature(enable = "avx512bitalg,avx512vl")]
299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
300#[cfg_attr(test, assert_instr(vpopcntb))]
301pub fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
302 unsafe { transmute(src:simd_ctpop(a.as_i8x16())) }
303}
304
305/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
306///
307/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
308/// Otherwise the computation result is written into the result.
309///
310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi8)
311#[inline]
312#[target_feature(enable = "avx512bitalg,avx512vl")]
313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
314#[cfg_attr(test, assert_instr(vpopcntb))]
315pub fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
316 unsafe {
317 transmute(src:simd_select_bitmask(
318 m:k,
319 yes:simd_ctpop(a.as_i8x16()),
320 no:i8x16::ZERO,
321 ))
322 }
323}
324
325/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
326///
327/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
328/// Otherwise the computation result is written into the result.
329///
330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi8)
331#[inline]
332#[target_feature(enable = "avx512bitalg,avx512vl")]
333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
334#[cfg_attr(test, assert_instr(vpopcntb))]
335pub fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
336 unsafe {
337 transmute(src:simd_select_bitmask(
338 m:k,
339 yes:simd_ctpop(a.as_i8x16()),
340 no:src.as_i8x16(),
341 ))
342 }
343}
344
345/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
346/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
347/// It then selects these bits and packs them into the output.
348///
349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bitshuffle_epi64_mask)
350#[inline]
351#[target_feature(enable = "avx512bitalg")]
352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
353#[cfg_attr(test, assert_instr(vpshufbitqmb))]
354pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
355 unsafe { bitshuffle_512(data:b.as_i8x64(), indices:c.as_i8x64(), !0) }
356}
357
358/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
359/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
360/// It then selects these bits and packs them into the output.
361///
362/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
363/// Otherwise the computation result is written into the result.
364///
365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_bitshuffle_epi64_mask)
366#[inline]
367#[target_feature(enable = "avx512bitalg")]
368#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
369#[cfg_attr(test, assert_instr(vpshufbitqmb))]
370pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
371 unsafe { bitshuffle_512(data:b.as_i8x64(), indices:c.as_i8x64(), mask:k) }
372}
373
374/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
375/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
376/// It then selects these bits and packs them into the output.
377///
378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bitshuffle_epi64_mask)
379#[inline]
380#[target_feature(enable = "avx512bitalg,avx512vl")]
381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
382#[cfg_attr(test, assert_instr(vpshufbitqmb))]
383pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
384 unsafe { bitshuffle_256(data:b.as_i8x32(), indices:c.as_i8x32(), !0) }
385}
386
387/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
388/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
389/// It then selects these bits and packs them into the output.
390///
391/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
392/// Otherwise the computation result is written into the result.
393///
394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_bitshuffle_epi64_mask)
395#[inline]
396#[target_feature(enable = "avx512bitalg,avx512vl")]
397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
398#[cfg_attr(test, assert_instr(vpshufbitqmb))]
399pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
400 unsafe { bitshuffle_256(data:b.as_i8x32(), indices:c.as_i8x32(), mask:k) }
401}
402
403/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
404/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
405/// It then selects these bits and packs them into the output.
406///
407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bitshuffle_epi64_mask)
408#[inline]
409#[target_feature(enable = "avx512bitalg,avx512vl")]
410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
411#[cfg_attr(test, assert_instr(vpshufbitqmb))]
412pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
413 unsafe { bitshuffle_128(data:b.as_i8x16(), indices:c.as_i8x16(), !0) }
414}
415
416/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
417/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
418/// It then selects these bits and packs them into the output.
419///
420/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
421/// Otherwise the computation result is written into the result.
422///
423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_bitshuffle_epi64_mask)
424#[inline]
425#[target_feature(enable = "avx512bitalg,avx512vl")]
426#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
427#[cfg_attr(test, assert_instr(vpshufbitqmb))]
428pub fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
429 unsafe { bitshuffle_128(data:b.as_i8x16(), indices:c.as_i8x16(), mask:k) }
430}
431
432#[cfg(test)]
433mod tests {
434 // Some of the constants in the tests below are just bit patterns. They should not
435 // be interpreted as integers; signedness does not make sense for them, but
436 // __mXXXi happens to be defined in terms of signed integers.
437 #![allow(overflowing_literals)]
438
439 use stdarch_test::simd_test;
440
441 use crate::core_arch::x86::*;
442
443 #[simd_test(enable = "avx512bitalg,avx512f")]
444 unsafe fn test_mm512_popcnt_epi16() {
445 let test_data = _mm512_set_epi16(
446 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
447 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
448 1024, 2048,
449 );
450 let actual_result = _mm512_popcnt_epi16(test_data);
451 let reference_result = _mm512_set_epi16(
452 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 12, 8, 1, 1, 1, 1, 1, 1,
453 1, 1, 1, 1, 1, 1,
454 );
455 assert_eq_m512i(actual_result, reference_result);
456 }
457
458 #[simd_test(enable = "avx512bitalg,avx512f")]
459 unsafe fn test_mm512_maskz_popcnt_epi16() {
460 let test_data = _mm512_set_epi16(
461 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
462 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
463 1024, 2048,
464 );
465 let mask = 0xFF_FF_00_00;
466 let actual_result = _mm512_maskz_popcnt_epi16(mask, test_data);
467 let reference_result = _mm512_set_epi16(
468 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
469 0, 0, 0, 0, 0,
470 );
471 assert_eq_m512i(actual_result, reference_result);
472 }
473
474 #[simd_test(enable = "avx512bitalg,avx512f")]
475 unsafe fn test_mm512_mask_popcnt_epi16() {
476 let test_data = _mm512_set_epi16(
477 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
478 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
479 1024, 2048,
480 );
481 let mask = 0xFF_FF_00_00;
482 let actual_result = _mm512_mask_popcnt_epi16(test_data, mask, test_data);
483 let reference_result = _mm512_set_epi16(
484 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF_FF, -1, -100, 255, 256, 2,
485 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048,
486 );
487 assert_eq_m512i(actual_result, reference_result);
488 }
489
490 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
491 unsafe fn test_mm256_popcnt_epi16() {
492 let test_data = _mm256_set_epi16(
493 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
494 0x3F_FF, 0x7F_FF,
495 );
496 let actual_result = _mm256_popcnt_epi16(test_data);
497 let reference_result =
498 _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
499 assert_eq_m256i(actual_result, reference_result);
500 }
501
502 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
503 unsafe fn test_mm256_maskz_popcnt_epi16() {
504 let test_data = _mm256_set_epi16(
505 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
506 0x3F_FF, 0x7F_FF,
507 );
508 let mask = 0xFF_00;
509 let actual_result = _mm256_maskz_popcnt_epi16(mask, test_data);
510 let reference_result = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
511 assert_eq_m256i(actual_result, reference_result);
512 }
513
514 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
515 unsafe fn test_mm256_mask_popcnt_epi16() {
516 let test_data = _mm256_set_epi16(
517 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
518 0x3F_FF, 0x7F_FF,
519 );
520 let mask = 0xFF_00;
521 let actual_result = _mm256_mask_popcnt_epi16(test_data, mask, test_data);
522 let reference_result = _mm256_set_epi16(
523 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, 0x3F_FF, 0x7F_FF,
524 );
525 assert_eq_m256i(actual_result, reference_result);
526 }
527
528 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
529 unsafe fn test_mm_popcnt_epi16() {
530 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
531 let actual_result = _mm_popcnt_epi16(test_data);
532 let reference_result = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
533 assert_eq_m128i(actual_result, reference_result);
534 }
535
536 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
537 unsafe fn test_mm_maskz_popcnt_epi16() {
538 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
539 let mask = 0xF0;
540 let actual_result = _mm_maskz_popcnt_epi16(mask, test_data);
541 let reference_result = _mm_set_epi16(0, 1, 2, 3, 0, 0, 0, 0);
542 assert_eq_m128i(actual_result, reference_result);
543 }
544
545 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
546 unsafe fn test_mm_mask_popcnt_epi16() {
547 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
548 let mask = 0xF0;
549 let actual_result = _mm_mask_popcnt_epi16(test_data, mask, test_data);
550 let reference_result = _mm_set_epi16(0, 1, 2, 3, 0xF, 0x1F, 0x3F, 0x7F);
551 assert_eq_m128i(actual_result, reference_result);
552 }
553
554 #[simd_test(enable = "avx512bitalg,avx512f")]
555 unsafe fn test_mm512_popcnt_epi8() {
556 let test_data = _mm512_set_epi8(
557 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
558 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
559 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
560 225, 21, 249, 211, 155, 228, 70,
561 );
562 let actual_result = _mm512_popcnt_epi8(test_data);
563 let reference_result = _mm512_set_epi8(
564 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
565 2, 4, 4, 6, 4, 3, 3, 5, 6, 3, 3, 5, 6, 4, 4, 4, 3, 3, 6, 7, 3, 5, 5, 3, 4, 5, 3, 4, 4,
566 3, 6, 5, 5, 4, 3,
567 );
568 assert_eq_m512i(actual_result, reference_result);
569 }
570
571 #[simd_test(enable = "avx512bitalg,avx512f")]
572 unsafe fn test_mm512_maskz_popcnt_epi8() {
573 let test_data = _mm512_set_epi8(
574 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
575 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
576 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
577 225, 21, 249, 211, 155, 228, 70,
578 );
579 let mask = 0xFF_FF_FF_FF_00_00_00_00;
580 let actual_result = _mm512_maskz_popcnt_epi8(mask, test_data);
581 let reference_result = _mm512_set_epi8(
582 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
583 2, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
584 0, 0, 0, 0, 0, 0,
585 );
586 assert_eq_m512i(actual_result, reference_result);
587 }
588
589 #[simd_test(enable = "avx512bitalg,avx512f")]
590 unsafe fn test_mm512_mask_popcnt_epi8() {
591 let test_data = _mm512_set_epi8(
592 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
593 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
594 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
595 225, 21, 249, 211, 155, 228, 70,
596 );
597 let mask = 0xFF_FF_FF_FF_00_00_00_00;
598 let actual_result = _mm512_mask_popcnt_epi8(test_data, mask, test_data);
599 let reference_result = _mm512_set_epi8(
600 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
601 2, 4, 4, 183, 154, 84, 56, 227, 189, 140, 35, 117, 219, 169, 226, 170, 13, 22, 159,
602 251, 73, 121, 143, 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
603 );
604 assert_eq_m512i(actual_result, reference_result);
605 }
606
607 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
608 unsafe fn test_mm256_popcnt_epi8() {
609 let test_data = _mm256_set_epi8(
610 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
611 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172,
612 );
613 let actual_result = _mm256_popcnt_epi8(test_data);
614 let reference_result = _mm256_set_epi8(
615 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
616 2, 4, 4,
617 );
618 assert_eq_m256i(actual_result, reference_result);
619 }
620
621 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
622 unsafe fn test_mm256_maskz_popcnt_epi8() {
623 let test_data = _mm256_set_epi8(
624 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
625 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
626 );
627 let mask = 0xFF_FF_00_00;
628 let actual_result = _mm256_maskz_popcnt_epi8(mask, test_data);
629 let reference_result = _mm256_set_epi8(
630 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
631 0, 0, 0,
632 );
633 assert_eq_m256i(actual_result, reference_result);
634 }
635
636 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
637 unsafe fn test_mm256_mask_popcnt_epi8() {
638 let test_data = _mm256_set_epi8(
639 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
640 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
641 );
642 let mask = 0xFF_FF_00_00;
643 let actual_result = _mm256_mask_popcnt_epi8(test_data, mask, test_data);
644 let reference_result = _mm256_set_epi8(
645 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 251, 73, 121, 143, 145, 85, 91, 137,
646 90, 225, 21, 249, 211, 155, 228, 70,
647 );
648 assert_eq_m256i(actual_result, reference_result);
649 }
650
651 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
652 unsafe fn test_mm_popcnt_epi8() {
653 let test_data = _mm_set_epi8(
654 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64,
655 );
656 let actual_result = _mm_popcnt_epi8(test_data);
657 let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1);
658 assert_eq_m128i(actual_result, reference_result);
659 }
660
661 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
662 unsafe fn test_mm_maskz_popcnt_epi8() {
663 let test_data = _mm_set_epi8(
664 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
665 );
666 let mask = 0xFF_00;
667 let actual_result = _mm_maskz_popcnt_epi8(mask, test_data);
668 let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
669 assert_eq_m128i(actual_result, reference_result);
670 }
671
672 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
673 unsafe fn test_mm_mask_popcnt_epi8() {
674 let test_data = _mm_set_epi8(
675 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
676 );
677 let mask = 0xFF_00;
678 let actual_result = _mm_mask_popcnt_epi8(test_data, mask, test_data);
679 let reference_result =
680 _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 90, 225, 21, 249, 211, 155, 228, 70);
681 assert_eq_m128i(actual_result, reference_result);
682 }
683
684 #[simd_test(enable = "avx512bitalg,avx512f")]
685 unsafe fn test_mm512_bitshuffle_epi64_mask() {
686 let test_indices = _mm512_set_epi8(
687 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
688 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
689 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
690 );
691 let test_data = _mm512_setr_epi64(
692 0xFF_FF_FF_FF_00_00_00_00,
693 0xFF_00_FF_00_FF_00_FF_00,
694 0xFF_00_00_00_00_00_00_00,
695 0xAC_00_00_00_00_00_00_00,
696 0xFF_FF_FF_FF_00_00_00_00,
697 0xFF_00_FF_00_FF_00_FF_00,
698 0xFF_00_00_00_00_00_00_00,
699 0xAC_00_00_00_00_00_00_00,
700 );
701 let actual_result = _mm512_bitshuffle_epi64_mask(test_data, test_indices);
702 let reference_result = 0xF0 << 0
703 | 0x03 << 8
704 | 0xFF << 16
705 | 0xAC << 24
706 | 0xF0 << 32
707 | 0x03 << 40
708 | 0xFF << 48
709 | 0xAC << 56;
710
711 assert_eq!(actual_result, reference_result);
712 }
713
714 #[simd_test(enable = "avx512bitalg,avx512f")]
715 unsafe fn test_mm512_mask_bitshuffle_epi64_mask() {
716 let test_indices = _mm512_set_epi8(
717 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
718 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
719 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
720 );
721 let test_data = _mm512_setr_epi64(
722 0xFF_FF_FF_FF_00_00_00_00,
723 0xFF_00_FF_00_FF_00_FF_00,
724 0xFF_00_00_00_00_00_00_00,
725 0xAC_00_00_00_00_00_00_00,
726 0xFF_FF_FF_FF_00_00_00_00,
727 0xFF_00_FF_00_FF_00_FF_00,
728 0xFF_00_00_00_00_00_00_00,
729 0xAC_00_00_00_00_00_00_00,
730 );
731 let mask = 0xFF_FF_FF_FF_00_00_00_00;
732 let actual_result = _mm512_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
733 let reference_result = 0x00 << 0
734 | 0x00 << 8
735 | 0x00 << 16
736 | 0x00 << 24
737 | 0xF0 << 32
738 | 0x03 << 40
739 | 0xFF << 48
740 | 0xAC << 56;
741
742 assert_eq!(actual_result, reference_result);
743 }
744
745 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
746 unsafe fn test_mm256_bitshuffle_epi64_mask() {
747 let test_indices = _mm256_set_epi8(
748 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
749 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
750 );
751 let test_data = _mm256_setr_epi64x(
752 0xFF_FF_FF_FF_00_00_00_00,
753 0xFF_00_FF_00_FF_00_FF_00,
754 0xFF_00_00_00_00_00_00_00,
755 0xAC_00_00_00_00_00_00_00,
756 );
757 let actual_result = _mm256_bitshuffle_epi64_mask(test_data, test_indices);
758 let reference_result = 0xF0 << 0 | 0x03 << 8 | 0xFF << 16 | 0xAC << 24;
759
760 assert_eq!(actual_result, reference_result);
761 }
762
763 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
764 unsafe fn test_mm256_mask_bitshuffle_epi64_mask() {
765 let test_indices = _mm256_set_epi8(
766 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
767 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
768 );
769 let test_data = _mm256_setr_epi64x(
770 0xFF_FF_FF_FF_00_00_00_00,
771 0xFF_00_FF_00_FF_00_FF_00,
772 0xFF_00_00_00_00_00_00_00,
773 0xAC_00_00_00_00_00_00_00,
774 );
775 let mask = 0xFF_FF_00_00;
776 let actual_result = _mm256_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
777 let reference_result = 0x00 << 0 | 0x00 << 8 | 0xFF << 16 | 0xAC << 24;
778
779 assert_eq!(actual_result, reference_result);
780 }
781
782 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
783 unsafe fn test_mm_bitshuffle_epi64_mask() {
784 let test_indices = _mm_set_epi8(
785 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
786 );
787 let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
788 let actual_result = _mm_bitshuffle_epi64_mask(test_data, test_indices);
789 let reference_result = 0xFF << 0 | 0xAC << 8;
790
791 assert_eq!(actual_result, reference_result);
792 }
793
794 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
795 unsafe fn test_mm_mask_bitshuffle_epi64_mask() {
796 let test_indices = _mm_set_epi8(
797 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
798 );
799 let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
800 let mask = 0xFF_00;
801 let actual_result = _mm_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
802 let reference_result = 0x00 << 0 | 0xAC << 8;
803
804 assert_eq!(actual_result, reference_result);
805 }
806}
807