1//! Bit-oriented Algorithms (BITALG)
2//!
3//! The intrinsics here correspond to those in the `immintrin.h` C header.
4//!
5//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
6//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
7//!
8//! [intel64_ref]: https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
9
10use crate::core_arch::simd::i8x16;
11use crate::core_arch::simd::i8x32;
12use crate::core_arch::simd::i8x64;
13use crate::core_arch::simd::i16x8;
14use crate::core_arch::simd::i16x16;
15use crate::core_arch::simd::i16x32;
16use crate::core_arch::x86::__m128i;
17use crate::core_arch::x86::__m256i;
18use crate::core_arch::x86::__m512i;
19use crate::core_arch::x86::__mmask8;
20use crate::core_arch::x86::__mmask16;
21use crate::core_arch::x86::__mmask32;
22use crate::core_arch::x86::__mmask64;
23use crate::intrinsics::simd::{simd_ctpop, simd_select_bitmask};
24use crate::mem::transmute;
25
26#[cfg(test)]
27use stdarch_test::assert_instr;
28
29#[allow(improper_ctypes)]
30unsafe extern "C" {
31 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512"]
32 unsafefn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64;
33 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256"]
34 unsafefn bitshuffle_256(data: i8x32, indices: i8x32, mask: __mmask32) -> __mmask32;
35 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.128"]
36 unsafefn bitshuffle_128(data: i8x16, indices: i8x16, mask: __mmask16) -> __mmask16;
37}
38
39/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
40///
41/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi16)
42#[inline]
43#[target_feature(enable = "avx512bitalg")]
44#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
45#[cfg_attr(test, assert_instr(vpopcntw))]
46#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
47pub const fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
48 unsafe { transmute(src:simd_ctpop(a.as_i16x32())) }
49}
50
51/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
52///
53/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
54/// Otherwise the computation result is written into the result.
55///
56/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi16)
57#[inline]
58#[target_feature(enable = "avx512bitalg")]
59#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
60#[cfg_attr(test, assert_instr(vpopcntw))]
61#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
62pub const fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
63 unsafe {
64 transmute(src:simd_select_bitmask(
65 m:k,
66 yes:simd_ctpop(a.as_i16x32()),
67 no:i16x32::ZERO,
68 ))
69 }
70}
71
72/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
73///
74/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
75/// Otherwise the computation result is written into the result.
76///
77/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi16)
78#[inline]
79#[target_feature(enable = "avx512bitalg")]
80#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
81#[cfg_attr(test, assert_instr(vpopcntw))]
82#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
83pub const fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
84 unsafe {
85 transmute(src:simd_select_bitmask(
86 m:k,
87 yes:simd_ctpop(a.as_i16x32()),
88 no:src.as_i16x32(),
89 ))
90 }
91}
92
93/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
94///
95/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi16)
96#[inline]
97#[target_feature(enable = "avx512bitalg,avx512vl")]
98#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
99#[cfg_attr(test, assert_instr(vpopcntw))]
100#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
101pub const fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
102 unsafe { transmute(src:simd_ctpop(a.as_i16x16())) }
103}
104
105/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
106///
107/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
108/// Otherwise the computation result is written into the result.
109///
110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi16)
111#[inline]
112#[target_feature(enable = "avx512bitalg,avx512vl")]
113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
114#[cfg_attr(test, assert_instr(vpopcntw))]
115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
116pub const fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
117 unsafe {
118 transmute(src:simd_select_bitmask(
119 m:k,
120 yes:simd_ctpop(a.as_i16x16()),
121 no:i16x16::ZERO,
122 ))
123 }
124}
125
126/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
127///
128/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
129/// Otherwise the computation result is written into the result.
130///
131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi16)
132#[inline]
133#[target_feature(enable = "avx512bitalg,avx512vl")]
134#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
135#[cfg_attr(test, assert_instr(vpopcntw))]
136#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
137pub const fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
138 unsafe {
139 transmute(src:simd_select_bitmask(
140 m:k,
141 yes:simd_ctpop(a.as_i16x16()),
142 no:src.as_i16x16(),
143 ))
144 }
145}
146
147/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
148///
149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi16)
150#[inline]
151#[target_feature(enable = "avx512bitalg,avx512vl")]
152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
153#[cfg_attr(test, assert_instr(vpopcntw))]
154#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
155pub const fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
156 unsafe { transmute(src:simd_ctpop(a.as_i16x8())) }
157}
158
159/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
160///
161/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
162/// Otherwise the computation result is written into the result.
163///
164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi16)
165#[inline]
166#[target_feature(enable = "avx512bitalg,avx512vl")]
167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
168#[cfg_attr(test, assert_instr(vpopcntw))]
169#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
170pub const fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
171 unsafe {
172 transmute(src:simd_select_bitmask(
173 m:k,
174 yes:simd_ctpop(a.as_i16x8()),
175 no:i16x8::ZERO,
176 ))
177 }
178}
179
180/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
181///
182/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
183/// Otherwise the computation result is written into the result.
184///
185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi16)
186#[inline]
187#[target_feature(enable = "avx512bitalg,avx512vl")]
188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
189#[cfg_attr(test, assert_instr(vpopcntw))]
190#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
191pub const fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
192 unsafe {
193 transmute(src:simd_select_bitmask(
194 m:k,
195 yes:simd_ctpop(a.as_i16x8()),
196 no:src.as_i16x8(),
197 ))
198 }
199}
200
201/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
202///
203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi8)
204#[inline]
205#[target_feature(enable = "avx512bitalg")]
206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
207#[cfg_attr(test, assert_instr(vpopcntb))]
208#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
209pub const fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
210 unsafe { transmute(src:simd_ctpop(a.as_i8x64())) }
211}
212
213/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
214///
215/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
216/// Otherwise the computation result is written into the result.
217///
218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi8)
219#[inline]
220#[target_feature(enable = "avx512bitalg")]
221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
222#[cfg_attr(test, assert_instr(vpopcntb))]
223#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
224pub const fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
225 unsafe {
226 transmute(src:simd_select_bitmask(
227 m:k,
228 yes:simd_ctpop(a.as_i8x64()),
229 no:i8x64::ZERO,
230 ))
231 }
232}
233
234/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
235///
236/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
237/// Otherwise the computation result is written into the result.
238///
239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi8)
240#[inline]
241#[target_feature(enable = "avx512bitalg")]
242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
243#[cfg_attr(test, assert_instr(vpopcntb))]
244#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
245pub const fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
246 unsafe {
247 transmute(src:simd_select_bitmask(
248 m:k,
249 yes:simd_ctpop(a.as_i8x64()),
250 no:src.as_i8x64(),
251 ))
252 }
253}
254
255/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
256///
257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi8)
258#[inline]
259#[target_feature(enable = "avx512bitalg,avx512vl")]
260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
261#[cfg_attr(test, assert_instr(vpopcntb))]
262#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
263pub const fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
264 unsafe { transmute(src:simd_ctpop(a.as_i8x32())) }
265}
266
267/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
268///
269/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
270/// Otherwise the computation result is written into the result.
271///
272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi8)
273#[inline]
274#[target_feature(enable = "avx512bitalg,avx512vl")]
275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
276#[cfg_attr(test, assert_instr(vpopcntb))]
277#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
278pub const fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
279 unsafe {
280 transmute(src:simd_select_bitmask(
281 m:k,
282 yes:simd_ctpop(a.as_i8x32()),
283 no:i8x32::ZERO,
284 ))
285 }
286}
287
288/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
289///
290/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
291/// Otherwise the computation result is written into the result.
292///
293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi8)
294#[inline]
295#[target_feature(enable = "avx512bitalg,avx512vl")]
296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
297#[cfg_attr(test, assert_instr(vpopcntb))]
298#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
299pub const fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
300 unsafe {
301 transmute(src:simd_select_bitmask(
302 m:k,
303 yes:simd_ctpop(a.as_i8x32()),
304 no:src.as_i8x32(),
305 ))
306 }
307}
308
309/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
310///
311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi8)
312#[inline]
313#[target_feature(enable = "avx512bitalg,avx512vl")]
314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
315#[cfg_attr(test, assert_instr(vpopcntb))]
316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
317pub const fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
318 unsafe { transmute(src:simd_ctpop(a.as_i8x16())) }
319}
320
321/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
322///
323/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
324/// Otherwise the computation result is written into the result.
325///
326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi8)
327#[inline]
328#[target_feature(enable = "avx512bitalg,avx512vl")]
329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
330#[cfg_attr(test, assert_instr(vpopcntb))]
331#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
332pub const fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
333 unsafe {
334 transmute(src:simd_select_bitmask(
335 m:k,
336 yes:simd_ctpop(a.as_i8x16()),
337 no:i8x16::ZERO,
338 ))
339 }
340}
341
342/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
343///
344/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
345/// Otherwise the computation result is written into the result.
346///
347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi8)
348#[inline]
349#[target_feature(enable = "avx512bitalg,avx512vl")]
350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
351#[cfg_attr(test, assert_instr(vpopcntb))]
352#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
353pub const fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
354 unsafe {
355 transmute(src:simd_select_bitmask(
356 m:k,
357 yes:simd_ctpop(a.as_i8x16()),
358 no:src.as_i8x16(),
359 ))
360 }
361}
362
363/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
364/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
365/// It then selects these bits and packs them into the output.
366///
367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bitshuffle_epi64_mask)
368#[inline]
369#[target_feature(enable = "avx512bitalg")]
370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
371#[cfg_attr(test, assert_instr(vpshufbitqmb))]
372pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
373 unsafe { bitshuffle_512(data:b.as_i8x64(), indices:c.as_i8x64(), !0) }
374}
375
376/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
377/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
378/// It then selects these bits and packs them into the output.
379///
380/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
381/// Otherwise the computation result is written into the result.
382///
383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_bitshuffle_epi64_mask)
384#[inline]
385#[target_feature(enable = "avx512bitalg")]
386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
387#[cfg_attr(test, assert_instr(vpshufbitqmb))]
388pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
389 unsafe { bitshuffle_512(data:b.as_i8x64(), indices:c.as_i8x64(), mask:k) }
390}
391
392/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
393/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
394/// It then selects these bits and packs them into the output.
395///
396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bitshuffle_epi64_mask)
397#[inline]
398#[target_feature(enable = "avx512bitalg,avx512vl")]
399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
400#[cfg_attr(test, assert_instr(vpshufbitqmb))]
401pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
402 unsafe { bitshuffle_256(data:b.as_i8x32(), indices:c.as_i8x32(), !0) }
403}
404
405/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
406/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
407/// It then selects these bits and packs them into the output.
408///
409/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
410/// Otherwise the computation result is written into the result.
411///
412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_bitshuffle_epi64_mask)
413#[inline]
414#[target_feature(enable = "avx512bitalg,avx512vl")]
415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
416#[cfg_attr(test, assert_instr(vpshufbitqmb))]
417pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
418 unsafe { bitshuffle_256(data:b.as_i8x32(), indices:c.as_i8x32(), mask:k) }
419}
420
421/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
422/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
423/// It then selects these bits and packs them into the output.
424///
425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bitshuffle_epi64_mask)
426#[inline]
427#[target_feature(enable = "avx512bitalg,avx512vl")]
428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
429#[cfg_attr(test, assert_instr(vpshufbitqmb))]
430pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
431 unsafe { bitshuffle_128(data:b.as_i8x16(), indices:c.as_i8x16(), !0) }
432}
433
434/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
435/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
436/// It then selects these bits and packs them into the output.
437///
438/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
439/// Otherwise the computation result is written into the result.
440///
441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_bitshuffle_epi64_mask)
442#[inline]
443#[target_feature(enable = "avx512bitalg,avx512vl")]
444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
445#[cfg_attr(test, assert_instr(vpshufbitqmb))]
446pub fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
447 unsafe { bitshuffle_128(data:b.as_i8x16(), indices:c.as_i8x16(), mask:k) }
448}
449
450#[cfg(test)]
451mod tests {
452 // Some of the constants in the tests below are just bit patterns. They should not
453 // be interpreted as integers; signedness does not make sense for them, but
454 // __mXXXi happens to be defined in terms of signed integers.
455 #![allow(overflowing_literals)]
456
457 use crate::core_arch::assert_eq_const as assert_eq;
458 use stdarch_test::simd_test;
459
460 use crate::core_arch::x86::*;
461
462 #[simd_test(enable = "avx512bitalg,avx512f")]
463 const fn test_mm512_popcnt_epi16() {
464 let test_data = _mm512_set_epi16(
465 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
466 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
467 1024, 2048,
468 );
469 let actual_result = _mm512_popcnt_epi16(test_data);
470 let reference_result = _mm512_set_epi16(
471 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 12, 8, 1, 1, 1, 1, 1, 1,
472 1, 1, 1, 1, 1, 1,
473 );
474 assert_eq_m512i(actual_result, reference_result);
475 }
476
477 #[simd_test(enable = "avx512bitalg,avx512f")]
478 const fn test_mm512_maskz_popcnt_epi16() {
479 let test_data = _mm512_set_epi16(
480 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
481 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
482 1024, 2048,
483 );
484 let mask = 0xFF_FF_00_00;
485 let actual_result = _mm512_maskz_popcnt_epi16(mask, test_data);
486 let reference_result = _mm512_set_epi16(
487 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
488 0, 0, 0, 0, 0,
489 );
490 assert_eq_m512i(actual_result, reference_result);
491 }
492
493 #[simd_test(enable = "avx512bitalg,avx512f")]
494 const fn test_mm512_mask_popcnt_epi16() {
495 let test_data = _mm512_set_epi16(
496 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
497 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
498 1024, 2048,
499 );
500 let mask = 0xFF_FF_00_00;
501 let actual_result = _mm512_mask_popcnt_epi16(test_data, mask, test_data);
502 let reference_result = _mm512_set_epi16(
503 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF_FF, -1, -100, 255, 256, 2,
504 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048,
505 );
506 assert_eq_m512i(actual_result, reference_result);
507 }
508
509 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
510 const fn test_mm256_popcnt_epi16() {
511 let test_data = _mm256_set_epi16(
512 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
513 0x3F_FF, 0x7F_FF,
514 );
515 let actual_result = _mm256_popcnt_epi16(test_data);
516 let reference_result =
517 _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
518 assert_eq_m256i(actual_result, reference_result);
519 }
520
521 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
522 const fn test_mm256_maskz_popcnt_epi16() {
523 let test_data = _mm256_set_epi16(
524 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
525 0x3F_FF, 0x7F_FF,
526 );
527 let mask = 0xFF_00;
528 let actual_result = _mm256_maskz_popcnt_epi16(mask, test_data);
529 let reference_result = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
530 assert_eq_m256i(actual_result, reference_result);
531 }
532
533 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
534 const fn test_mm256_mask_popcnt_epi16() {
535 let test_data = _mm256_set_epi16(
536 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
537 0x3F_FF, 0x7F_FF,
538 );
539 let mask = 0xFF_00;
540 let actual_result = _mm256_mask_popcnt_epi16(test_data, mask, test_data);
541 let reference_result = _mm256_set_epi16(
542 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, 0x3F_FF, 0x7F_FF,
543 );
544 assert_eq_m256i(actual_result, reference_result);
545 }
546
547 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
548 const fn test_mm_popcnt_epi16() {
549 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
550 let actual_result = _mm_popcnt_epi16(test_data);
551 let reference_result = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
552 assert_eq_m128i(actual_result, reference_result);
553 }
554
555 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
556 const fn test_mm_maskz_popcnt_epi16() {
557 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
558 let mask = 0xF0;
559 let actual_result = _mm_maskz_popcnt_epi16(mask, test_data);
560 let reference_result = _mm_set_epi16(0, 1, 2, 3, 0, 0, 0, 0);
561 assert_eq_m128i(actual_result, reference_result);
562 }
563
564 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
565 const fn test_mm_mask_popcnt_epi16() {
566 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
567 let mask = 0xF0;
568 let actual_result = _mm_mask_popcnt_epi16(test_data, mask, test_data);
569 let reference_result = _mm_set_epi16(0, 1, 2, 3, 0xF, 0x1F, 0x3F, 0x7F);
570 assert_eq_m128i(actual_result, reference_result);
571 }
572
573 #[simd_test(enable = "avx512bitalg,avx512f")]
574 const fn test_mm512_popcnt_epi8() {
575 let test_data = _mm512_set_epi8(
576 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
577 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
578 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
579 225, 21, 249, 211, 155, 228, 70,
580 );
581 let actual_result = _mm512_popcnt_epi8(test_data);
582 let reference_result = _mm512_set_epi8(
583 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
584 2, 4, 4, 6, 4, 3, 3, 5, 6, 3, 3, 5, 6, 4, 4, 4, 3, 3, 6, 7, 3, 5, 5, 3, 4, 5, 3, 4, 4,
585 3, 6, 5, 5, 4, 3,
586 );
587 assert_eq_m512i(actual_result, reference_result);
588 }
589
590 #[simd_test(enable = "avx512bitalg,avx512f")]
591 const fn test_mm512_maskz_popcnt_epi8() {
592 let test_data = _mm512_set_epi8(
593 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
594 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
595 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
596 225, 21, 249, 211, 155, 228, 70,
597 );
598 let mask = 0xFF_FF_FF_FF_00_00_00_00;
599 let actual_result = _mm512_maskz_popcnt_epi8(mask, test_data);
600 let reference_result = _mm512_set_epi8(
601 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
602 2, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
603 0, 0, 0, 0, 0, 0,
604 );
605 assert_eq_m512i(actual_result, reference_result);
606 }
607
608 #[simd_test(enable = "avx512bitalg,avx512f")]
609 const fn test_mm512_mask_popcnt_epi8() {
610 let test_data = _mm512_set_epi8(
611 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
612 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
613 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
614 225, 21, 249, 211, 155, 228, 70,
615 );
616 let mask = 0xFF_FF_FF_FF_00_00_00_00;
617 let actual_result = _mm512_mask_popcnt_epi8(test_data, mask, test_data);
618 let reference_result = _mm512_set_epi8(
619 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
620 2, 4, 4, 183, 154, 84, 56, 227, 189, 140, 35, 117, 219, 169, 226, 170, 13, 22, 159,
621 251, 73, 121, 143, 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
622 );
623 assert_eq_m512i(actual_result, reference_result);
624 }
625
626 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
627 const fn test_mm256_popcnt_epi8() {
628 let test_data = _mm256_set_epi8(
629 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
630 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172,
631 );
632 let actual_result = _mm256_popcnt_epi8(test_data);
633 let reference_result = _mm256_set_epi8(
634 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
635 2, 4, 4,
636 );
637 assert_eq_m256i(actual_result, reference_result);
638 }
639
640 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
641 const fn test_mm256_maskz_popcnt_epi8() {
642 let test_data = _mm256_set_epi8(
643 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
644 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
645 );
646 let mask = 0xFF_FF_00_00;
647 let actual_result = _mm256_maskz_popcnt_epi8(mask, test_data);
648 let reference_result = _mm256_set_epi8(
649 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
650 0, 0, 0,
651 );
652 assert_eq_m256i(actual_result, reference_result);
653 }
654
655 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
656 const fn test_mm256_mask_popcnt_epi8() {
657 let test_data = _mm256_set_epi8(
658 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
659 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
660 );
661 let mask = 0xFF_FF_00_00;
662 let actual_result = _mm256_mask_popcnt_epi8(test_data, mask, test_data);
663 let reference_result = _mm256_set_epi8(
664 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 251, 73, 121, 143, 145, 85, 91, 137,
665 90, 225, 21, 249, 211, 155, 228, 70,
666 );
667 assert_eq_m256i(actual_result, reference_result);
668 }
669
670 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
671 const fn test_mm_popcnt_epi8() {
672 let test_data = _mm_set_epi8(
673 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64,
674 );
675 let actual_result = _mm_popcnt_epi8(test_data);
676 let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1);
677 assert_eq_m128i(actual_result, reference_result);
678 }
679
680 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
681 const fn test_mm_maskz_popcnt_epi8() {
682 let test_data = _mm_set_epi8(
683 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
684 );
685 let mask = 0xFF_00;
686 let actual_result = _mm_maskz_popcnt_epi8(mask, test_data);
687 let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
688 assert_eq_m128i(actual_result, reference_result);
689 }
690
691 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
692 const fn test_mm_mask_popcnt_epi8() {
693 let test_data = _mm_set_epi8(
694 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
695 );
696 let mask = 0xFF_00;
697 let actual_result = _mm_mask_popcnt_epi8(test_data, mask, test_data);
698 let reference_result =
699 _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 90, 225, 21, 249, 211, 155, 228, 70);
700 assert_eq_m128i(actual_result, reference_result);
701 }
702
703 #[simd_test(enable = "avx512bitalg,avx512f")]
704 fn test_mm512_bitshuffle_epi64_mask() {
705 let test_indices = _mm512_set_epi8(
706 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
707 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
708 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
709 );
710 let test_data = _mm512_setr_epi64(
711 0xFF_FF_FF_FF_00_00_00_00,
712 0xFF_00_FF_00_FF_00_FF_00,
713 0xFF_00_00_00_00_00_00_00,
714 0xAC_00_00_00_00_00_00_00,
715 0xFF_FF_FF_FF_00_00_00_00,
716 0xFF_00_FF_00_FF_00_FF_00,
717 0xFF_00_00_00_00_00_00_00,
718 0xAC_00_00_00_00_00_00_00,
719 );
720 let actual_result = _mm512_bitshuffle_epi64_mask(test_data, test_indices);
721 let reference_result = 0xF0 << 0
722 | 0x03 << 8
723 | 0xFF << 16
724 | 0xAC << 24
725 | 0xF0 << 32
726 | 0x03 << 40
727 | 0xFF << 48
728 | 0xAC << 56;
729
730 assert_eq!(actual_result, reference_result);
731 }
732
733 #[simd_test(enable = "avx512bitalg,avx512f")]
734 fn test_mm512_mask_bitshuffle_epi64_mask() {
735 let test_indices = _mm512_set_epi8(
736 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
737 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
738 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
739 );
740 let test_data = _mm512_setr_epi64(
741 0xFF_FF_FF_FF_00_00_00_00,
742 0xFF_00_FF_00_FF_00_FF_00,
743 0xFF_00_00_00_00_00_00_00,
744 0xAC_00_00_00_00_00_00_00,
745 0xFF_FF_FF_FF_00_00_00_00,
746 0xFF_00_FF_00_FF_00_FF_00,
747 0xFF_00_00_00_00_00_00_00,
748 0xAC_00_00_00_00_00_00_00,
749 );
750 let mask = 0xFF_FF_FF_FF_00_00_00_00;
751 let actual_result = _mm512_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
752 let reference_result = 0x00 << 0
753 | 0x00 << 8
754 | 0x00 << 16
755 | 0x00 << 24
756 | 0xF0 << 32
757 | 0x03 << 40
758 | 0xFF << 48
759 | 0xAC << 56;
760
761 assert_eq!(actual_result, reference_result);
762 }
763
764 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
765 fn test_mm256_bitshuffle_epi64_mask() {
766 let test_indices = _mm256_set_epi8(
767 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
768 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
769 );
770 let test_data = _mm256_setr_epi64x(
771 0xFF_FF_FF_FF_00_00_00_00,
772 0xFF_00_FF_00_FF_00_FF_00,
773 0xFF_00_00_00_00_00_00_00,
774 0xAC_00_00_00_00_00_00_00,
775 );
776 let actual_result = _mm256_bitshuffle_epi64_mask(test_data, test_indices);
777 let reference_result = 0xF0 << 0 | 0x03 << 8 | 0xFF << 16 | 0xAC << 24;
778
779 assert_eq!(actual_result, reference_result);
780 }
781
782 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
783 fn test_mm256_mask_bitshuffle_epi64_mask() {
784 let test_indices = _mm256_set_epi8(
785 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
786 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
787 );
788 let test_data = _mm256_setr_epi64x(
789 0xFF_FF_FF_FF_00_00_00_00,
790 0xFF_00_FF_00_FF_00_FF_00,
791 0xFF_00_00_00_00_00_00_00,
792 0xAC_00_00_00_00_00_00_00,
793 );
794 let mask = 0xFF_FF_00_00;
795 let actual_result = _mm256_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
796 let reference_result = 0x00 << 0 | 0x00 << 8 | 0xFF << 16 | 0xAC << 24;
797
798 assert_eq!(actual_result, reference_result);
799 }
800
801 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
802 fn test_mm_bitshuffle_epi64_mask() {
803 let test_indices = _mm_set_epi8(
804 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
805 );
806 let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
807 let actual_result = _mm_bitshuffle_epi64_mask(test_data, test_indices);
808 let reference_result = 0xFF << 0 | 0xAC << 8;
809
810 assert_eq!(actual_result, reference_result);
811 }
812
813 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
814 fn test_mm_mask_bitshuffle_epi64_mask() {
815 let test_indices = _mm_set_epi8(
816 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
817 );
818 let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
819 let mask = 0xFF_00;
820 let actual_result = _mm_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
821 let reference_result = 0x00 << 0 | 0xAC << 8;
822
823 assert_eq!(actual_result, reference_result);
824 }
825}
826