1//! Bit-oriented Algorithms (BITALG)
2//!
3//! The intrinsics here correspond to those in the `immintrin.h` C header.
4//!
5//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
6//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
7//!
8//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
9
10use crate::core_arch::simd::i16x16;
11use crate::core_arch::simd::i16x32;
12use crate::core_arch::simd::i16x8;
13use crate::core_arch::simd::i8x16;
14use crate::core_arch::simd::i8x32;
15use crate::core_arch::simd::i8x64;
16use crate::core_arch::x86::__m128i;
17use crate::core_arch::x86::__m256i;
18use crate::core_arch::x86::__m512i;
19use crate::core_arch::x86::__mmask16;
20use crate::core_arch::x86::__mmask32;
21use crate::core_arch::x86::__mmask64;
22use crate::core_arch::x86::__mmask8;
23use crate::core_arch::x86::_mm256_setzero_si256;
24use crate::core_arch::x86::_mm512_setzero_si512;
25use crate::core_arch::x86::_mm_setzero_si128;
26use crate::core_arch::x86::m128iExt;
27use crate::core_arch::x86::m256iExt;
28use crate::core_arch::x86::m512iExt;
29use crate::intrinsics::simd::simd_select_bitmask;
30use crate::mem::transmute;
31
32#[cfg(test)]
33use stdarch_test::assert_instr;
34
35#[allow(improper_ctypes)]
36extern "C" {
37 #[link_name = "llvm.ctpop.v32i16"]
38 fn popcnt_v32i16(x: i16x32) -> i16x32;
39 #[link_name = "llvm.ctpop.v16i16"]
40 fn popcnt_v16i16(x: i16x16) -> i16x16;
41 #[link_name = "llvm.ctpop.v8i16"]
42 fn popcnt_v8i16(x: i16x8) -> i16x8;
43
44 #[link_name = "llvm.ctpop.v64i8"]
45 fn popcnt_v64i8(x: i8x64) -> i8x64;
46 #[link_name = "llvm.ctpop.v32i8"]
47 fn popcnt_v32i8(x: i8x32) -> i8x32;
48 #[link_name = "llvm.ctpop.v16i8"]
49 fn popcnt_v16i8(x: i8x16) -> i8x16;
50
51 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512"]
52 fn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64;
53 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256"]
54 fn bitshuffle_256(data: i8x32, indices: i8x32, mask: __mmask32) -> __mmask32;
55 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.128"]
56 fn bitshuffle_128(data: i8x16, indices: i8x16, mask: __mmask16) -> __mmask16;
57}
58
59/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
60///
61/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi16)
62#[inline]
63#[target_feature(enable = "avx512bitalg")]
64#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
65#[cfg_attr(test, assert_instr(vpopcntw))]
66pub unsafe fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
67 transmute(src:popcnt_v32i16(a.as_i16x32()))
68}
69
70/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
71///
72/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
73/// Otherwise the computation result is written into the result.
74///
75/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi16)
76#[inline]
77#[target_feature(enable = "avx512bitalg")]
78#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
79#[cfg_attr(test, assert_instr(vpopcntw))]
80pub unsafe fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
81 let zero: i16x32 = _mm512_setzero_si512().as_i16x32();
82 transmute(src:simd_select_bitmask(m:k, yes:popcnt_v32i16(a.as_i16x32()), no:zero))
83}
84
85/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
86///
87/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
88/// Otherwise the computation result is written into the result.
89///
90/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi16)
91#[inline]
92#[target_feature(enable = "avx512bitalg")]
93#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
94#[cfg_attr(test, assert_instr(vpopcntw))]
95pub unsafe fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
96 transmute(src:simd_select_bitmask(
97 m:k,
98 yes:popcnt_v32i16(a.as_i16x32()),
99 no:src.as_i16x32(),
100 ))
101}
102
103/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
104///
105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi16)
106#[inline]
107#[target_feature(enable = "avx512bitalg,avx512vl")]
108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
109#[cfg_attr(test, assert_instr(vpopcntw))]
110pub unsafe fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
111 transmute(src:popcnt_v16i16(a.as_i16x16()))
112}
113
114/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
115///
116/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
117/// Otherwise the computation result is written into the result.
118///
119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi16)
120#[inline]
121#[target_feature(enable = "avx512bitalg,avx512vl")]
122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
123#[cfg_attr(test, assert_instr(vpopcntw))]
124pub unsafe fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
125 let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
126 transmute(src:simd_select_bitmask(m:k, yes:popcnt_v16i16(a.as_i16x16()), no:zero))
127}
128
129/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
130///
131/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
132/// Otherwise the computation result is written into the result.
133///
134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi16)
135#[inline]
136#[target_feature(enable = "avx512bitalg,avx512vl")]
137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
138#[cfg_attr(test, assert_instr(vpopcntw))]
139pub unsafe fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
140 transmute(src:simd_select_bitmask(
141 m:k,
142 yes:popcnt_v16i16(a.as_i16x16()),
143 no:src.as_i16x16(),
144 ))
145}
146
147/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
148///
149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi16)
150#[inline]
151#[target_feature(enable = "avx512bitalg,avx512vl")]
152#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
153#[cfg_attr(test, assert_instr(vpopcntw))]
154pub unsafe fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
155 transmute(src:popcnt_v8i16(a.as_i16x8()))
156}
157
158/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
159///
160/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
161/// Otherwise the computation result is written into the result.
162///
163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi16)
164#[inline]
165#[target_feature(enable = "avx512bitalg,avx512vl")]
166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
167#[cfg_attr(test, assert_instr(vpopcntw))]
168pub unsafe fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
169 let zero: i16x8 = _mm_setzero_si128().as_i16x8();
170 transmute(src:simd_select_bitmask(m:k, yes:popcnt_v8i16(a.as_i16x8()), no:zero))
171}
172
173/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
174///
175/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
176/// Otherwise the computation result is written into the result.
177///
178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi16)
179#[inline]
180#[target_feature(enable = "avx512bitalg,avx512vl")]
181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
182#[cfg_attr(test, assert_instr(vpopcntw))]
183pub unsafe fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
184 transmute(src:simd_select_bitmask(
185 m:k,
186 yes:popcnt_v8i16(a.as_i16x8()),
187 no:src.as_i16x8(),
188 ))
189}
190
191/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
192///
193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi8)
194#[inline]
195#[target_feature(enable = "avx512bitalg")]
196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
197#[cfg_attr(test, assert_instr(vpopcntb))]
198pub unsafe fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
199 transmute(src:popcnt_v64i8(a.as_i8x64()))
200}
201
202/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
203///
204/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
205/// Otherwise the computation result is written into the result.
206///
207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi8)
208#[inline]
209#[target_feature(enable = "avx512bitalg")]
210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
211#[cfg_attr(test, assert_instr(vpopcntb))]
212pub unsafe fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
213 let zero: i8x64 = _mm512_setzero_si512().as_i8x64();
214 transmute(src:simd_select_bitmask(m:k, yes:popcnt_v64i8(a.as_i8x64()), no:zero))
215}
216
217/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
218///
219/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
220/// Otherwise the computation result is written into the result.
221///
222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi8)
223#[inline]
224#[target_feature(enable = "avx512bitalg")]
225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
226#[cfg_attr(test, assert_instr(vpopcntb))]
227pub unsafe fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
228 transmute(src:simd_select_bitmask(
229 m:k,
230 yes:popcnt_v64i8(a.as_i8x64()),
231 no:src.as_i8x64(),
232 ))
233}
234
235/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
236///
237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi8)
238#[inline]
239#[target_feature(enable = "avx512bitalg,avx512vl")]
240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
241#[cfg_attr(test, assert_instr(vpopcntb))]
242pub unsafe fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
243 transmute(src:popcnt_v32i8(a.as_i8x32()))
244}
245
246/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
247///
248/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
249/// Otherwise the computation result is written into the result.
250///
251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi8)
252#[inline]
253#[target_feature(enable = "avx512bitalg,avx512vl")]
254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
255#[cfg_attr(test, assert_instr(vpopcntb))]
256pub unsafe fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
257 let zero: i8x32 = _mm256_setzero_si256().as_i8x32();
258 transmute(src:simd_select_bitmask(m:k, yes:popcnt_v32i8(a.as_i8x32()), no:zero))
259}
260
261/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
262///
263/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
264/// Otherwise the computation result is written into the result.
265///
266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi8)
267#[inline]
268#[target_feature(enable = "avx512bitalg,avx512vl")]
269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
270#[cfg_attr(test, assert_instr(vpopcntb))]
271pub unsafe fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
272 transmute(src:simd_select_bitmask(
273 m:k,
274 yes:popcnt_v32i8(a.as_i8x32()),
275 no:src.as_i8x32(),
276 ))
277}
278
279/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
280///
281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi8)
282#[inline]
283#[target_feature(enable = "avx512bitalg,avx512vl")]
284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
285#[cfg_attr(test, assert_instr(vpopcntb))]
286pub unsafe fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
287 transmute(src:popcnt_v16i8(a.as_i8x16()))
288}
289
290/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
291///
292/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
293/// Otherwise the computation result is written into the result.
294///
295/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi8)
296#[inline]
297#[target_feature(enable = "avx512bitalg,avx512vl")]
298#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
299#[cfg_attr(test, assert_instr(vpopcntb))]
300pub unsafe fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
301 let zero: i8x16 = _mm_setzero_si128().as_i8x16();
302 transmute(src:simd_select_bitmask(m:k, yes:popcnt_v16i8(a.as_i8x16()), no:zero))
303}
304
305/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
306///
307/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
308/// Otherwise the computation result is written into the result.
309///
310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi8)
311#[inline]
312#[target_feature(enable = "avx512bitalg,avx512vl")]
313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
314#[cfg_attr(test, assert_instr(vpopcntb))]
315pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
316 transmute(src:simd_select_bitmask(
317 m:k,
318 yes:popcnt_v16i8(a.as_i8x16()),
319 no:src.as_i8x16(),
320 ))
321}
322
323/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
324/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
325/// It then selects these bits and packs them into the output.
326///
327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bitshuffle_epi64_mask)
328#[inline]
329#[target_feature(enable = "avx512bitalg")]
330#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
331#[cfg_attr(test, assert_instr(vpshufbitqmb))]
332pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
333 bitshuffle_512(data:b.as_i8x64(), indices:c.as_i8x64(), !0)
334}
335
336/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
337/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
338/// It then selects these bits and packs them into the output.
339///
340/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
341/// Otherwise the computation result is written into the result.
342///
343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_bitshuffle_epi64_mask)
344#[inline]
345#[target_feature(enable = "avx512bitalg")]
346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
347#[cfg_attr(test, assert_instr(vpshufbitqmb))]
348pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
349 bitshuffle_512(data:b.as_i8x64(), indices:c.as_i8x64(), mask:k)
350}
351
352/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
353/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
354/// It then selects these bits and packs them into the output.
355///
356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bitshuffle_epi64_mask)
357#[inline]
358#[target_feature(enable = "avx512bitalg,avx512vl")]
359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
360#[cfg_attr(test, assert_instr(vpshufbitqmb))]
361pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
362 bitshuffle_256(data:b.as_i8x32(), indices:c.as_i8x32(), !0)
363}
364
365/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
366/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
367/// It then selects these bits and packs them into the output.
368///
369/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
370/// Otherwise the computation result is written into the result.
371///
372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_bitshuffle_epi64_mask)
373#[inline]
374#[target_feature(enable = "avx512bitalg,avx512vl")]
375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
376#[cfg_attr(test, assert_instr(vpshufbitqmb))]
377pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
378 bitshuffle_256(data:b.as_i8x32(), indices:c.as_i8x32(), mask:k)
379}
380
381/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
382/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
383/// It then selects these bits and packs them into the output.
384///
385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bitshuffle_epi64_mask)
386#[inline]
387#[target_feature(enable = "avx512bitalg,avx512vl")]
388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
389#[cfg_attr(test, assert_instr(vpshufbitqmb))]
390pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
391 bitshuffle_128(data:b.as_i8x16(), indices:c.as_i8x16(), !0)
392}
393
394/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
395/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
396/// It then selects these bits and packs them into the output.
397///
398/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
399/// Otherwise the computation result is written into the result.
400///
401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_bitshuffle_epi64_mask)
402#[inline]
403#[target_feature(enable = "avx512bitalg,avx512vl")]
404#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
405#[cfg_attr(test, assert_instr(vpshufbitqmb))]
406pub unsafe fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
407 bitshuffle_128(data:b.as_i8x16(), indices:c.as_i8x16(), mask:k)
408}
409
410#[cfg(test)]
411mod tests {
412 // Some of the constants in the tests below are just bit patterns. They should not
413 // be interpreted as integers; signedness does not make sense for them, but
414 // __mXXXi happens to be defined in terms of signed integers.
415 #![allow(overflowing_literals)]
416
417 use stdarch_test::simd_test;
418
419 use crate::core_arch::x86::*;
420
421 #[simd_test(enable = "avx512bitalg,avx512f")]
422 unsafe fn test_mm512_popcnt_epi16() {
423 let test_data = _mm512_set_epi16(
424 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
425 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
426 1024, 2048,
427 );
428 let actual_result = _mm512_popcnt_epi16(test_data);
429 let reference_result = _mm512_set_epi16(
430 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 12, 8, 1, 1, 1, 1, 1, 1,
431 1, 1, 1, 1, 1, 1,
432 );
433 assert_eq_m512i(actual_result, reference_result);
434 }
435
436 #[simd_test(enable = "avx512bitalg,avx512f")]
437 unsafe fn test_mm512_maskz_popcnt_epi16() {
438 let test_data = _mm512_set_epi16(
439 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
440 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
441 1024, 2048,
442 );
443 let mask = 0xFF_FF_00_00;
444 let actual_result = _mm512_maskz_popcnt_epi16(mask, test_data);
445 let reference_result = _mm512_set_epi16(
446 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
447 0, 0, 0, 0, 0,
448 );
449 assert_eq_m512i(actual_result, reference_result);
450 }
451
452 #[simd_test(enable = "avx512bitalg,avx512f")]
453 unsafe fn test_mm512_mask_popcnt_epi16() {
454 let test_data = _mm512_set_epi16(
455 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
456 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
457 1024, 2048,
458 );
459 let mask = 0xFF_FF_00_00;
460 let actual_result = _mm512_mask_popcnt_epi16(test_data, mask, test_data);
461 let reference_result = _mm512_set_epi16(
462 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF_FF, -1, -100, 255, 256, 2,
463 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048,
464 );
465 assert_eq_m512i(actual_result, reference_result);
466 }
467
468 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
469 unsafe fn test_mm256_popcnt_epi16() {
470 let test_data = _mm256_set_epi16(
471 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
472 0x3F_FF, 0x7F_FF,
473 );
474 let actual_result = _mm256_popcnt_epi16(test_data);
475 let reference_result =
476 _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
477 assert_eq_m256i(actual_result, reference_result);
478 }
479
480 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
481 unsafe fn test_mm256_maskz_popcnt_epi16() {
482 let test_data = _mm256_set_epi16(
483 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
484 0x3F_FF, 0x7F_FF,
485 );
486 let mask = 0xFF_00;
487 let actual_result = _mm256_maskz_popcnt_epi16(mask, test_data);
488 let reference_result = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
489 assert_eq_m256i(actual_result, reference_result);
490 }
491
492 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
493 unsafe fn test_mm256_mask_popcnt_epi16() {
494 let test_data = _mm256_set_epi16(
495 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
496 0x3F_FF, 0x7F_FF,
497 );
498 let mask = 0xFF_00;
499 let actual_result = _mm256_mask_popcnt_epi16(test_data, mask, test_data);
500 let reference_result = _mm256_set_epi16(
501 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, 0x3F_FF, 0x7F_FF,
502 );
503 assert_eq_m256i(actual_result, reference_result);
504 }
505
506 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
507 unsafe fn test_mm_popcnt_epi16() {
508 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
509 let actual_result = _mm_popcnt_epi16(test_data);
510 let reference_result = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
511 assert_eq_m128i(actual_result, reference_result);
512 }
513
514 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
515 unsafe fn test_mm_maskz_popcnt_epi16() {
516 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
517 let mask = 0xF0;
518 let actual_result = _mm_maskz_popcnt_epi16(mask, test_data);
519 let reference_result = _mm_set_epi16(0, 1, 2, 3, 0, 0, 0, 0);
520 assert_eq_m128i(actual_result, reference_result);
521 }
522
523 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
524 unsafe fn test_mm_mask_popcnt_epi16() {
525 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
526 let mask = 0xF0;
527 let actual_result = _mm_mask_popcnt_epi16(test_data, mask, test_data);
528 let reference_result = _mm_set_epi16(0, 1, 2, 3, 0xF, 0x1F, 0x3F, 0x7F);
529 assert_eq_m128i(actual_result, reference_result);
530 }
531
532 #[simd_test(enable = "avx512bitalg,avx512f")]
533 unsafe fn test_mm512_popcnt_epi8() {
534 let test_data = _mm512_set_epi8(
535 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
536 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
537 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
538 225, 21, 249, 211, 155, 228, 70,
539 );
540 let actual_result = _mm512_popcnt_epi8(test_data);
541 let reference_result = _mm512_set_epi8(
542 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
543 2, 4, 4, 6, 4, 3, 3, 5, 6, 3, 3, 5, 6, 4, 4, 4, 3, 3, 6, 7, 3, 5, 5, 3, 4, 5, 3, 4, 4,
544 3, 6, 5, 5, 4, 3,
545 );
546 assert_eq_m512i(actual_result, reference_result);
547 }
548
549 #[simd_test(enable = "avx512bitalg,avx512f")]
550 unsafe fn test_mm512_maskz_popcnt_epi8() {
551 let test_data = _mm512_set_epi8(
552 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
553 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
554 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
555 225, 21, 249, 211, 155, 228, 70,
556 );
557 let mask = 0xFF_FF_FF_FF_00_00_00_00;
558 let actual_result = _mm512_maskz_popcnt_epi8(mask, test_data);
559 let reference_result = _mm512_set_epi8(
560 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
561 2, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
562 0, 0, 0, 0, 0, 0,
563 );
564 assert_eq_m512i(actual_result, reference_result);
565 }
566
567 #[simd_test(enable = "avx512bitalg,avx512f")]
568 unsafe fn test_mm512_mask_popcnt_epi8() {
569 let test_data = _mm512_set_epi8(
570 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
571 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
572 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
573 225, 21, 249, 211, 155, 228, 70,
574 );
575 let mask = 0xFF_FF_FF_FF_00_00_00_00;
576 let actual_result = _mm512_mask_popcnt_epi8(test_data, mask, test_data);
577 let reference_result = _mm512_set_epi8(
578 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
579 2, 4, 4, 183, 154, 84, 56, 227, 189, 140, 35, 117, 219, 169, 226, 170, 13, 22, 159,
580 251, 73, 121, 143, 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
581 );
582 assert_eq_m512i(actual_result, reference_result);
583 }
584
585 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
586 unsafe fn test_mm256_popcnt_epi8() {
587 let test_data = _mm256_set_epi8(
588 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
589 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172,
590 );
591 let actual_result = _mm256_popcnt_epi8(test_data);
592 let reference_result = _mm256_set_epi8(
593 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
594 2, 4, 4,
595 );
596 assert_eq_m256i(actual_result, reference_result);
597 }
598
599 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
600 unsafe fn test_mm256_maskz_popcnt_epi8() {
601 let test_data = _mm256_set_epi8(
602 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
603 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
604 );
605 let mask = 0xFF_FF_00_00;
606 let actual_result = _mm256_maskz_popcnt_epi8(mask, test_data);
607 let reference_result = _mm256_set_epi8(
608 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
609 0, 0, 0,
610 );
611 assert_eq_m256i(actual_result, reference_result);
612 }
613
614 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
615 unsafe fn test_mm256_mask_popcnt_epi8() {
616 let test_data = _mm256_set_epi8(
617 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
618 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
619 );
620 let mask = 0xFF_FF_00_00;
621 let actual_result = _mm256_mask_popcnt_epi8(test_data, mask, test_data);
622 let reference_result = _mm256_set_epi8(
623 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 251, 73, 121, 143, 145, 85, 91, 137,
624 90, 225, 21, 249, 211, 155, 228, 70,
625 );
626 assert_eq_m256i(actual_result, reference_result);
627 }
628
629 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
630 unsafe fn test_mm_popcnt_epi8() {
631 let test_data = _mm_set_epi8(
632 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64,
633 );
634 let actual_result = _mm_popcnt_epi8(test_data);
635 let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1);
636 assert_eq_m128i(actual_result, reference_result);
637 }
638
639 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
640 unsafe fn test_mm_maskz_popcnt_epi8() {
641 let test_data = _mm_set_epi8(
642 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
643 );
644 let mask = 0xFF_00;
645 let actual_result = _mm_maskz_popcnt_epi8(mask, test_data);
646 let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
647 assert_eq_m128i(actual_result, reference_result);
648 }
649
650 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
651 unsafe fn test_mm_mask_popcnt_epi8() {
652 let test_data = _mm_set_epi8(
653 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
654 );
655 let mask = 0xFF_00;
656 let actual_result = _mm_mask_popcnt_epi8(test_data, mask, test_data);
657 let reference_result =
658 _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 90, 225, 21, 249, 211, 155, 228, 70);
659 assert_eq_m128i(actual_result, reference_result);
660 }
661
662 #[simd_test(enable = "avx512bitalg,avx512f")]
663 unsafe fn test_mm512_bitshuffle_epi64_mask() {
664 let test_indices = _mm512_set_epi8(
665 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
666 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
667 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
668 );
669 let test_data = _mm512_setr_epi64(
670 0xFF_FF_FF_FF_00_00_00_00,
671 0xFF_00_FF_00_FF_00_FF_00,
672 0xFF_00_00_00_00_00_00_00,
673 0xAC_00_00_00_00_00_00_00,
674 0xFF_FF_FF_FF_00_00_00_00,
675 0xFF_00_FF_00_FF_00_FF_00,
676 0xFF_00_00_00_00_00_00_00,
677 0xAC_00_00_00_00_00_00_00,
678 );
679 let actual_result = _mm512_bitshuffle_epi64_mask(test_data, test_indices);
680 let reference_result = 0xF0 << 0
681 | 0x03 << 8
682 | 0xFF << 16
683 | 0xAC << 24
684 | 0xF0 << 32
685 | 0x03 << 40
686 | 0xFF << 48
687 | 0xAC << 56;
688
689 assert_eq!(actual_result, reference_result);
690 }
691
692 #[simd_test(enable = "avx512bitalg,avx512f")]
693 unsafe fn test_mm512_mask_bitshuffle_epi64_mask() {
694 let test_indices = _mm512_set_epi8(
695 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
696 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
697 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
698 );
699 let test_data = _mm512_setr_epi64(
700 0xFF_FF_FF_FF_00_00_00_00,
701 0xFF_00_FF_00_FF_00_FF_00,
702 0xFF_00_00_00_00_00_00_00,
703 0xAC_00_00_00_00_00_00_00,
704 0xFF_FF_FF_FF_00_00_00_00,
705 0xFF_00_FF_00_FF_00_FF_00,
706 0xFF_00_00_00_00_00_00_00,
707 0xAC_00_00_00_00_00_00_00,
708 );
709 let mask = 0xFF_FF_FF_FF_00_00_00_00;
710 let actual_result = _mm512_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
711 let reference_result = 0x00 << 0
712 | 0x00 << 8
713 | 0x00 << 16
714 | 0x00 << 24
715 | 0xF0 << 32
716 | 0x03 << 40
717 | 0xFF << 48
718 | 0xAC << 56;
719
720 assert_eq!(actual_result, reference_result);
721 }
722
723 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
724 unsafe fn test_mm256_bitshuffle_epi64_mask() {
725 let test_indices = _mm256_set_epi8(
726 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
727 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
728 );
729 let test_data = _mm256_setr_epi64x(
730 0xFF_FF_FF_FF_00_00_00_00,
731 0xFF_00_FF_00_FF_00_FF_00,
732 0xFF_00_00_00_00_00_00_00,
733 0xAC_00_00_00_00_00_00_00,
734 );
735 let actual_result = _mm256_bitshuffle_epi64_mask(test_data, test_indices);
736 let reference_result = 0xF0 << 0 | 0x03 << 8 | 0xFF << 16 | 0xAC << 24;
737
738 assert_eq!(actual_result, reference_result);
739 }
740
741 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
742 unsafe fn test_mm256_mask_bitshuffle_epi64_mask() {
743 let test_indices = _mm256_set_epi8(
744 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
745 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
746 );
747 let test_data = _mm256_setr_epi64x(
748 0xFF_FF_FF_FF_00_00_00_00,
749 0xFF_00_FF_00_FF_00_FF_00,
750 0xFF_00_00_00_00_00_00_00,
751 0xAC_00_00_00_00_00_00_00,
752 );
753 let mask = 0xFF_FF_00_00;
754 let actual_result = _mm256_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
755 let reference_result = 0x00 << 0 | 0x00 << 8 | 0xFF << 16 | 0xAC << 24;
756
757 assert_eq!(actual_result, reference_result);
758 }
759
760 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
761 unsafe fn test_mm_bitshuffle_epi64_mask() {
762 let test_indices = _mm_set_epi8(
763 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
764 );
765 let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
766 let actual_result = _mm_bitshuffle_epi64_mask(test_data, test_indices);
767 let reference_result = 0xFF << 0 | 0xAC << 8;
768
769 assert_eq!(actual_result, reference_result);
770 }
771
772 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
773 unsafe fn test_mm_mask_bitshuffle_epi64_mask() {
774 let test_indices = _mm_set_epi8(
775 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
776 );
777 let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
778 let mask = 0xFF_00;
779 let actual_result = _mm_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
780 let reference_result = 0x00 << 0 | 0xAC << 8;
781
782 assert_eq!(actual_result, reference_result);
783 }
784}
785