1//! Bit-oriented Algorithms (BITALG)
2//!
3//! The intrinsics here correspond to those in the `immintrin.h` C header.
4//!
5//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
6//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
7//!
8//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
9
10use crate::core_arch::simd::i16x16;
11use crate::core_arch::simd::i16x32;
12use crate::core_arch::simd::i16x8;
13use crate::core_arch::simd::i8x16;
14use crate::core_arch::simd::i8x32;
15use crate::core_arch::simd::i8x64;
16use crate::core_arch::simd_llvm::simd_select_bitmask;
17use crate::core_arch::x86::__m128i;
18use crate::core_arch::x86::__m256i;
19use crate::core_arch::x86::__m512i;
20use crate::core_arch::x86::__mmask16;
21use crate::core_arch::x86::__mmask32;
22use crate::core_arch::x86::__mmask64;
23use crate::core_arch::x86::__mmask8;
24use crate::core_arch::x86::_mm256_setzero_si256;
25use crate::core_arch::x86::_mm512_setzero_si512;
26use crate::core_arch::x86::_mm_setzero_si128;
27use crate::core_arch::x86::m128iExt;
28use crate::core_arch::x86::m256iExt;
29use crate::core_arch::x86::m512iExt;
30use crate::mem::transmute;
31
32#[cfg(test)]
33use stdarch_test::assert_instr;
34
35#[allow(improper_ctypes)]
36extern "C" {
37 #[link_name = "llvm.ctpop.v32i16"]
38 fn popcnt_v32i16(x: i16x32) -> i16x32;
39 #[link_name = "llvm.ctpop.v16i16"]
40 fn popcnt_v16i16(x: i16x16) -> i16x16;
41 #[link_name = "llvm.ctpop.v8i16"]
42 fn popcnt_v8i16(x: i16x8) -> i16x8;
43
44 #[link_name = "llvm.ctpop.v64i8"]
45 fn popcnt_v64i8(x: i8x64) -> i8x64;
46 #[link_name = "llvm.ctpop.v32i8"]
47 fn popcnt_v32i8(x: i8x32) -> i8x32;
48 #[link_name = "llvm.ctpop.v16i8"]
49 fn popcnt_v16i8(x: i8x16) -> i8x16;
50
51 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512"]
52 fn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64;
53 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256"]
54 fn bitshuffle_256(data: i8x32, indices: i8x32, mask: __mmask32) -> __mmask32;
55 #[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.128"]
56 fn bitshuffle_128(data: i8x16, indices: i8x16, mask: __mmask16) -> __mmask16;
57}
58
59/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
60///
61/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi16)
62#[inline]
63#[target_feature(enable = "avx512bitalg")]
64#[cfg_attr(test, assert_instr(vpopcntw))]
65pub unsafe fn _mm512_popcnt_epi16(a: __m512i) -> __m512i {
66 transmute(src:popcnt_v32i16(a.as_i16x32()))
67}
68
69/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
70///
71/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
72/// Otherwise the computation result is written into the result.
73///
74/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi16)
75#[inline]
76#[target_feature(enable = "avx512bitalg")]
77#[cfg_attr(test, assert_instr(vpopcntw))]
78pub unsafe fn _mm512_maskz_popcnt_epi16(k: __mmask32, a: __m512i) -> __m512i {
79 let zero: i16x32 = _mm512_setzero_si512().as_i16x32();
80 transmute(src:simd_select_bitmask(m:k, a:popcnt_v32i16(a.as_i16x32()), b:zero))
81}
82
83/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
84///
85/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
86/// Otherwise the computation result is written into the result.
87///
88/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi16)
89#[inline]
90#[target_feature(enable = "avx512bitalg")]
91#[cfg_attr(test, assert_instr(vpopcntw))]
92pub unsafe fn _mm512_mask_popcnt_epi16(src: __m512i, k: __mmask32, a: __m512i) -> __m512i {
93 transmute(src:simd_select_bitmask(
94 m:k,
95 a:popcnt_v32i16(a.as_i16x32()),
96 b:src.as_i16x32(),
97 ))
98}
99
100/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
101///
102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi16)
103#[inline]
104#[target_feature(enable = "avx512bitalg,avx512vl")]
105#[cfg_attr(test, assert_instr(vpopcntw))]
106pub unsafe fn _mm256_popcnt_epi16(a: __m256i) -> __m256i {
107 transmute(src:popcnt_v16i16(a.as_i16x16()))
108}
109
110/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
111///
112/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
113/// Otherwise the computation result is written into the result.
114///
115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi16)
116#[inline]
117#[target_feature(enable = "avx512bitalg,avx512vl")]
118#[cfg_attr(test, assert_instr(vpopcntw))]
119pub unsafe fn _mm256_maskz_popcnt_epi16(k: __mmask16, a: __m256i) -> __m256i {
120 let zero: i16x16 = _mm256_setzero_si256().as_i16x16();
121 transmute(src:simd_select_bitmask(m:k, a:popcnt_v16i16(a.as_i16x16()), b:zero))
122}
123
124/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
125///
126/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
127/// Otherwise the computation result is written into the result.
128///
129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi16)
130#[inline]
131#[target_feature(enable = "avx512bitalg,avx512vl")]
132#[cfg_attr(test, assert_instr(vpopcntw))]
133pub unsafe fn _mm256_mask_popcnt_epi16(src: __m256i, k: __mmask16, a: __m256i) -> __m256i {
134 transmute(src:simd_select_bitmask(
135 m:k,
136 a:popcnt_v16i16(a.as_i16x16()),
137 b:src.as_i16x16(),
138 ))
139}
140
141/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
142///
143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi16)
144#[inline]
145#[target_feature(enable = "avx512bitalg,avx512vl")]
146#[cfg_attr(test, assert_instr(vpopcntw))]
147pub unsafe fn _mm_popcnt_epi16(a: __m128i) -> __m128i {
148 transmute(src:popcnt_v8i16(a.as_i16x8()))
149}
150
151/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
152///
153/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
154/// Otherwise the computation result is written into the result.
155///
156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi16)
157#[inline]
158#[target_feature(enable = "avx512bitalg,avx512vl")]
159#[cfg_attr(test, assert_instr(vpopcntw))]
160pub unsafe fn _mm_maskz_popcnt_epi16(k: __mmask8, a: __m128i) -> __m128i {
161 let zero: i16x8 = _mm_setzero_si128().as_i16x8();
162 transmute(src:simd_select_bitmask(m:k, a:popcnt_v8i16(a.as_i16x8()), b:zero))
163}
164
165/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
166///
167/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
168/// Otherwise the computation result is written into the result.
169///
170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi16)
171#[inline]
172#[target_feature(enable = "avx512bitalg,avx512vl")]
173#[cfg_attr(test, assert_instr(vpopcntw))]
174pub unsafe fn _mm_mask_popcnt_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
175 transmute(src:simd_select_bitmask(
176 m:k,
177 a:popcnt_v8i16(a.as_i16x8()),
178 b:src.as_i16x8(),
179 ))
180}
181
182/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
183///
184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi8)
185#[inline]
186#[target_feature(enable = "avx512bitalg")]
187#[cfg_attr(test, assert_instr(vpopcntb))]
188pub unsafe fn _mm512_popcnt_epi8(a: __m512i) -> __m512i {
189 transmute(src:popcnt_v64i8(a.as_i8x64()))
190}
191
192/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
193///
194/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
195/// Otherwise the computation result is written into the result.
196///
197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi8)
198#[inline]
199#[target_feature(enable = "avx512bitalg")]
200#[cfg_attr(test, assert_instr(vpopcntb))]
201pub unsafe fn _mm512_maskz_popcnt_epi8(k: __mmask64, a: __m512i) -> __m512i {
202 let zero: i8x64 = _mm512_setzero_si512().as_i8x64();
203 transmute(src:simd_select_bitmask(m:k, a:popcnt_v64i8(a.as_i8x64()), b:zero))
204}
205
206/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
207///
208/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
209/// Otherwise the computation result is written into the result.
210///
211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi8)
212#[inline]
213#[target_feature(enable = "avx512bitalg")]
214#[cfg_attr(test, assert_instr(vpopcntb))]
215pub unsafe fn _mm512_mask_popcnt_epi8(src: __m512i, k: __mmask64, a: __m512i) -> __m512i {
216 transmute(src:simd_select_bitmask(
217 m:k,
218 a:popcnt_v64i8(a.as_i8x64()),
219 b:src.as_i8x64(),
220 ))
221}
222
223/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
224///
225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi8)
226#[inline]
227#[target_feature(enable = "avx512bitalg,avx512vl")]
228#[cfg_attr(test, assert_instr(vpopcntb))]
229pub unsafe fn _mm256_popcnt_epi8(a: __m256i) -> __m256i {
230 transmute(src:popcnt_v32i8(a.as_i8x32()))
231}
232
233/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
234///
235/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
236/// Otherwise the computation result is written into the result.
237///
238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi8)
239#[inline]
240#[target_feature(enable = "avx512bitalg,avx512vl")]
241#[cfg_attr(test, assert_instr(vpopcntb))]
242pub unsafe fn _mm256_maskz_popcnt_epi8(k: __mmask32, a: __m256i) -> __m256i {
243 let zero: i8x32 = _mm256_setzero_si256().as_i8x32();
244 transmute(src:simd_select_bitmask(m:k, a:popcnt_v32i8(a.as_i8x32()), b:zero))
245}
246
247/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
248///
249/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
250/// Otherwise the computation result is written into the result.
251///
252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi8)
253#[inline]
254#[target_feature(enable = "avx512bitalg,avx512vl")]
255#[cfg_attr(test, assert_instr(vpopcntb))]
256pub unsafe fn _mm256_mask_popcnt_epi8(src: __m256i, k: __mmask32, a: __m256i) -> __m256i {
257 transmute(src:simd_select_bitmask(
258 m:k,
259 a:popcnt_v32i8(a.as_i8x32()),
260 b:src.as_i8x32(),
261 ))
262}
263
264/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
265///
266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi8)
267#[inline]
268#[target_feature(enable = "avx512bitalg,avx512vl")]
269#[cfg_attr(test, assert_instr(vpopcntb))]
270pub unsafe fn _mm_popcnt_epi8(a: __m128i) -> __m128i {
271 transmute(src:popcnt_v16i8(a.as_i8x16()))
272}
273
274/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
275///
276/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
277/// Otherwise the computation result is written into the result.
278///
279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi8)
280#[inline]
281#[target_feature(enable = "avx512bitalg,avx512vl")]
282#[cfg_attr(test, assert_instr(vpopcntb))]
283pub unsafe fn _mm_maskz_popcnt_epi8(k: __mmask16, a: __m128i) -> __m128i {
284 let zero: i8x16 = _mm_setzero_si128().as_i8x16();
285 transmute(src:simd_select_bitmask(m:k, a:popcnt_v16i8(a.as_i8x16()), b:zero))
286}
287
288/// For each packed 8-bit integer maps the value to the number of logical 1 bits.
289///
290/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
291/// Otherwise the computation result is written into the result.
292///
293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi8)
294#[inline]
295#[target_feature(enable = "avx512bitalg,avx512vl")]
296#[cfg_attr(test, assert_instr(vpopcntb))]
297pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m128i {
298 transmute(src:simd_select_bitmask(
299 m:k,
300 a:popcnt_v16i8(a.as_i8x16()),
301 b:src.as_i8x16(),
302 ))
303}
304
305/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
306/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
307/// It then selects these bits and packs them into the output.
308///
309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_bitshuffle_epi64_mask)
310#[inline]
311#[target_feature(enable = "avx512bitalg")]
312#[cfg_attr(test, assert_instr(vpshufbitqmb))]
313pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
314 bitshuffle_512(data:b.as_i8x64(), indices:c.as_i8x64(), !0)
315}
316
317/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
318/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
319/// It then selects these bits and packs them into the output.
320///
321/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
322/// Otherwise the computation result is written into the result.
323///
324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_bitshuffle_epi64_mask)
325#[inline]
326#[target_feature(enable = "avx512bitalg")]
327#[cfg_attr(test, assert_instr(vpshufbitqmb))]
328pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
329 bitshuffle_512(data:b.as_i8x64(), indices:c.as_i8x64(), mask:k)
330}
331
332/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
333/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
334/// It then selects these bits and packs them into the output.
335///
336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_bitshuffle_epi64_mask)
337#[inline]
338#[target_feature(enable = "avx512bitalg,avx512vl")]
339#[cfg_attr(test, assert_instr(vpshufbitqmb))]
340pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
341 bitshuffle_256(data:b.as_i8x32(), indices:c.as_i8x32(), !0)
342}
343
344/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
345/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
346/// It then selects these bits and packs them into the output.
347///
348/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
349/// Otherwise the computation result is written into the result.
350///
351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_bitshuffle_epi64_mask)
352#[inline]
353#[target_feature(enable = "avx512bitalg,avx512vl")]
354#[cfg_attr(test, assert_instr(vpshufbitqmb))]
355pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
356 bitshuffle_256(data:b.as_i8x32(), indices:c.as_i8x32(), mask:k)
357}
358
359/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
360/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
361/// It then selects these bits and packs them into the output.
362///
363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_bitshuffle_epi64_mask)
364#[inline]
365#[target_feature(enable = "avx512bitalg,avx512vl")]
366#[cfg_attr(test, assert_instr(vpshufbitqmb))]
367pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
368 bitshuffle_128(data:b.as_i8x16(), indices:c.as_i8x16(), !0)
369}
370
371/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
372/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
373/// It then selects these bits and packs them into the output.
374///
375/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
376/// Otherwise the computation result is written into the result.
377///
378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_bitshuffle_epi64_mask)
379#[inline]
380#[target_feature(enable = "avx512bitalg,avx512vl")]
381#[cfg_attr(test, assert_instr(vpshufbitqmb))]
382pub unsafe fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
383 bitshuffle_128(data:b.as_i8x16(), indices:c.as_i8x16(), mask:k)
384}
385
386#[cfg(test)]
387mod tests {
388 // Some of the constants in the tests below are just bit patterns. They should not
389 // be interpreted as integers; signedness does not make sense for them, but
390 // __mXXXi happens to be defined in terms of signed integers.
391 #![allow(overflowing_literals)]
392
393 use stdarch_test::simd_test;
394
395 use crate::core_arch::x86::*;
396
397 #[simd_test(enable = "avx512bitalg,avx512f")]
398 unsafe fn test_mm512_popcnt_epi16() {
399 let test_data = _mm512_set_epi16(
400 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
401 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
402 1024, 2048,
403 );
404 let actual_result = _mm512_popcnt_epi16(test_data);
405 let reference_result = _mm512_set_epi16(
406 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 12, 8, 1, 1, 1, 1, 1, 1,
407 1, 1, 1, 1, 1, 1,
408 );
409 assert_eq_m512i(actual_result, reference_result);
410 }
411
412 #[simd_test(enable = "avx512bitalg,avx512f")]
413 unsafe fn test_mm512_maskz_popcnt_epi16() {
414 let test_data = _mm512_set_epi16(
415 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
416 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
417 1024, 2048,
418 );
419 let mask = 0xFF_FF_00_00;
420 let actual_result = _mm512_maskz_popcnt_epi16(mask, test_data);
421 let reference_result = _mm512_set_epi16(
422 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
423 0, 0, 0, 0, 0,
424 );
425 assert_eq_m512i(actual_result, reference_result);
426 }
427
428 #[simd_test(enable = "avx512bitalg,avx512f")]
429 unsafe fn test_mm512_mask_popcnt_epi16() {
430 let test_data = _mm512_set_epi16(
431 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
432 0x3F_FF, 0x7F_FF, 0xFF_FF, -1, -100, 255, 256, 2, 4, 8, 16, 32, 64, 128, 256, 512,
433 1024, 2048,
434 );
435 let mask = 0xFF_FF_00_00;
436 let actual_result = _mm512_mask_popcnt_epi16(test_data, mask, test_data);
437 let reference_result = _mm512_set_epi16(
438 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF_FF, -1, -100, 255, 256, 2,
439 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048,
440 );
441 assert_eq_m512i(actual_result, reference_result);
442 }
443
444 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
445 unsafe fn test_mm256_popcnt_epi16() {
446 let test_data = _mm256_set_epi16(
447 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
448 0x3F_FF, 0x7F_FF,
449 );
450 let actual_result = _mm256_popcnt_epi16(test_data);
451 let reference_result =
452 _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
453 assert_eq_m256i(actual_result, reference_result);
454 }
455
456 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
457 unsafe fn test_mm256_maskz_popcnt_epi16() {
458 let test_data = _mm256_set_epi16(
459 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
460 0x3F_FF, 0x7F_FF,
461 );
462 let mask = 0xFF_00;
463 let actual_result = _mm256_maskz_popcnt_epi16(mask, test_data);
464 let reference_result = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
465 assert_eq_m256i(actual_result, reference_result);
466 }
467
468 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
469 unsafe fn test_mm256_mask_popcnt_epi16() {
470 let test_data = _mm256_set_epi16(
471 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF,
472 0x3F_FF, 0x7F_FF,
473 );
474 let mask = 0xFF_00;
475 let actual_result = _mm256_mask_popcnt_epi16(test_data, mask, test_data);
476 let reference_result = _mm256_set_epi16(
477 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x1_FF, 0x3_FF, 0x7_FF, 0xF_FF, 0x1F_FF, 0x3F_FF, 0x7F_FF,
478 );
479 assert_eq_m256i(actual_result, reference_result);
480 }
481
482 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
483 unsafe fn test_mm_popcnt_epi16() {
484 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
485 let actual_result = _mm_popcnt_epi16(test_data);
486 let reference_result = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
487 assert_eq_m128i(actual_result, reference_result);
488 }
489
490 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
491 unsafe fn test_mm_maskz_popcnt_epi16() {
492 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
493 let mask = 0xF0;
494 let actual_result = _mm_maskz_popcnt_epi16(mask, test_data);
495 let reference_result = _mm_set_epi16(0, 1, 2, 3, 0, 0, 0, 0);
496 assert_eq_m128i(actual_result, reference_result);
497 }
498
499 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
500 unsafe fn test_mm_mask_popcnt_epi16() {
501 let test_data = _mm_set_epi16(0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F);
502 let mask = 0xF0;
503 let actual_result = _mm_mask_popcnt_epi16(test_data, mask, test_data);
504 let reference_result = _mm_set_epi16(0, 1, 2, 3, 0xF, 0x1F, 0x3F, 0x7F);
505 assert_eq_m128i(actual_result, reference_result);
506 }
507
508 #[simd_test(enable = "avx512bitalg,avx512f")]
509 unsafe fn test_mm512_popcnt_epi8() {
510 let test_data = _mm512_set_epi8(
511 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
512 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
513 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
514 225, 21, 249, 211, 155, 228, 70,
515 );
516 let actual_result = _mm512_popcnt_epi8(test_data);
517 let reference_result = _mm512_set_epi8(
518 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
519 2, 4, 4, 6, 4, 3, 3, 5, 6, 3, 3, 5, 6, 4, 4, 4, 3, 3, 6, 7, 3, 5, 5, 3, 4, 5, 3, 4, 4,
520 3, 6, 5, 5, 4, 3,
521 );
522 assert_eq_m512i(actual_result, reference_result);
523 }
524
525 #[simd_test(enable = "avx512bitalg,avx512f")]
526 unsafe fn test_mm512_maskz_popcnt_epi8() {
527 let test_data = _mm512_set_epi8(
528 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
529 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
530 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
531 225, 21, 249, 211, 155, 228, 70,
532 );
533 let mask = 0xFF_FF_FF_FF_00_00_00_00;
534 let actual_result = _mm512_maskz_popcnt_epi8(mask, test_data);
535 let reference_result = _mm512_set_epi8(
536 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
537 2, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
538 0, 0, 0, 0, 0, 0,
539 );
540 assert_eq_m512i(actual_result, reference_result);
541 }
542
543 #[simd_test(enable = "avx512bitalg,avx512f")]
544 unsafe fn test_mm512_mask_popcnt_epi8() {
545 let test_data = _mm512_set_epi8(
546 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
547 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172, 183, 154, 84, 56, 227, 189,
548 140, 35, 117, 219, 169, 226, 170, 13, 22, 159, 251, 73, 121, 143, 145, 85, 91, 137, 90,
549 225, 21, 249, 211, 155, 228, 70,
550 );
551 let mask = 0xFF_FF_FF_FF_00_00_00_00;
552 let actual_result = _mm512_mask_popcnt_epi8(test_data, mask, test_data);
553 let reference_result = _mm512_set_epi8(
554 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
555 2, 4, 4, 183, 154, 84, 56, 227, 189, 140, 35, 117, 219, 169, 226, 170, 13, 22, 159,
556 251, 73, 121, 143, 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
557 );
558 assert_eq_m512i(actual_result, reference_result);
559 }
560
561 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
562 unsafe fn test_mm256_popcnt_epi8() {
563 let test_data = _mm256_set_epi8(
564 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 128, 171, 206, 100,
565 217, 109, 253, 190, 177, 254, 179, 215, 230, 68, 201, 172,
566 );
567 let actual_result = _mm256_popcnt_epi8(test_data);
568 let reference_result = _mm256_set_epi8(
569 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 1, 5, 5, 3, 5, 5, 7, 6, 4, 7, 5, 6, 5,
570 2, 4, 4,
571 );
572 assert_eq_m256i(actual_result, reference_result);
573 }
574
575 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
576 unsafe fn test_mm256_maskz_popcnt_epi8() {
577 let test_data = _mm256_set_epi8(
578 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
579 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
580 );
581 let mask = 0xFF_FF_00_00;
582 let actual_result = _mm256_maskz_popcnt_epi8(mask, test_data);
583 let reference_result = _mm256_set_epi8(
584 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
585 0, 0, 0,
586 );
587 assert_eq_m256i(actual_result, reference_result);
588 }
589
590 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
591 unsafe fn test_mm256_mask_popcnt_epi8() {
592 let test_data = _mm256_set_epi8(
593 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64, 251, 73, 121, 143,
594 145, 85, 91, 137, 90, 225, 21, 249, 211, 155, 228, 70,
595 );
596 let mask = 0xFF_FF_00_00;
597 let actual_result = _mm256_mask_popcnt_epi8(test_data, mask, test_data);
598 let reference_result = _mm256_set_epi8(
599 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1, 251, 73, 121, 143, 145, 85, 91, 137,
600 90, 225, 21, 249, 211, 155, 228, 70,
601 );
602 assert_eq_m256i(actual_result, reference_result);
603 }
604
605 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
606 unsafe fn test_mm_popcnt_epi8() {
607 let test_data = _mm_set_epi8(
608 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, -1, 2, 4, 8, 16, 32, 64,
609 );
610 let actual_result = _mm_popcnt_epi8(test_data);
611 let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 1, 1, 1, 1, 1, 1);
612 assert_eq_m128i(actual_result, reference_result);
613 }
614
615 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
616 unsafe fn test_mm_maskz_popcnt_epi8() {
617 let test_data = _mm_set_epi8(
618 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
619 );
620 let mask = 0xFF_00;
621 let actual_result = _mm_maskz_popcnt_epi8(mask, test_data);
622 let reference_result = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
623 assert_eq_m128i(actual_result, reference_result);
624 }
625
626 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
627 unsafe fn test_mm_mask_popcnt_epi8() {
628 let test_data = _mm_set_epi8(
629 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 90, 225, 21, 249, 211, 155, 228, 70,
630 );
631 let mask = 0xFF_00;
632 let actual_result = _mm_mask_popcnt_epi8(test_data, mask, test_data);
633 let reference_result =
634 _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 90, 225, 21, 249, 211, 155, 228, 70);
635 assert_eq_m128i(actual_result, reference_result);
636 }
637
638 #[simd_test(enable = "avx512bitalg,avx512f")]
639 unsafe fn test_mm512_bitshuffle_epi64_mask() {
640 let test_indices = _mm512_set_epi8(
641 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
642 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
643 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
644 );
645 let test_data = _mm512_setr_epi64(
646 0xFF_FF_FF_FF_00_00_00_00,
647 0xFF_00_FF_00_FF_00_FF_00,
648 0xFF_00_00_00_00_00_00_00,
649 0xAC_00_00_00_00_00_00_00,
650 0xFF_FF_FF_FF_00_00_00_00,
651 0xFF_00_FF_00_FF_00_FF_00,
652 0xFF_00_00_00_00_00_00_00,
653 0xAC_00_00_00_00_00_00_00,
654 );
655 let actual_result = _mm512_bitshuffle_epi64_mask(test_data, test_indices);
656 let reference_result = 0xF0 << 0
657 | 0x03 << 8
658 | 0xFF << 16
659 | 0xAC << 24
660 | 0xF0 << 32
661 | 0x03 << 40
662 | 0xFF << 48
663 | 0xAC << 56;
664
665 assert_eq!(actual_result, reference_result);
666 }
667
668 #[simd_test(enable = "avx512bitalg,avx512f")]
669 unsafe fn test_mm512_mask_bitshuffle_epi64_mask() {
670 let test_indices = _mm512_set_epi8(
671 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
672 8, 8, 56, 48, 40, 32, 24, 16, 8, 0, 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59,
673 58, 57, 56, 32, 32, 16, 16, 0, 0, 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
674 );
675 let test_data = _mm512_setr_epi64(
676 0xFF_FF_FF_FF_00_00_00_00,
677 0xFF_00_FF_00_FF_00_FF_00,
678 0xFF_00_00_00_00_00_00_00,
679 0xAC_00_00_00_00_00_00_00,
680 0xFF_FF_FF_FF_00_00_00_00,
681 0xFF_00_FF_00_FF_00_FF_00,
682 0xFF_00_00_00_00_00_00_00,
683 0xAC_00_00_00_00_00_00_00,
684 );
685 let mask = 0xFF_FF_FF_FF_00_00_00_00;
686 let actual_result = _mm512_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
687 let reference_result = 0x00 << 0
688 | 0x00 << 8
689 | 0x00 << 16
690 | 0x00 << 24
691 | 0xF0 << 32
692 | 0x03 << 40
693 | 0xFF << 48
694 | 0xAC << 56;
695
696 assert_eq!(actual_result, reference_result);
697 }
698
699 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
700 unsafe fn test_mm256_bitshuffle_epi64_mask() {
701 let test_indices = _mm256_set_epi8(
702 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
703 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
704 );
705 let test_data = _mm256_setr_epi64x(
706 0xFF_FF_FF_FF_00_00_00_00,
707 0xFF_00_FF_00_FF_00_FF_00,
708 0xFF_00_00_00_00_00_00_00,
709 0xAC_00_00_00_00_00_00_00,
710 );
711 let actual_result = _mm256_bitshuffle_epi64_mask(test_data, test_indices);
712 let reference_result = 0xF0 << 0 | 0x03 << 8 | 0xFF << 16 | 0xAC << 24;
713
714 assert_eq!(actual_result, reference_result);
715 }
716
717 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
718 unsafe fn test_mm256_mask_bitshuffle_epi64_mask() {
719 let test_indices = _mm256_set_epi8(
720 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56, 32, 32, 16, 16, 0, 0,
721 8, 8, 56, 48, 40, 32, 24, 16, 8, 0,
722 );
723 let test_data = _mm256_setr_epi64x(
724 0xFF_FF_FF_FF_00_00_00_00,
725 0xFF_00_FF_00_FF_00_FF_00,
726 0xFF_00_00_00_00_00_00_00,
727 0xAC_00_00_00_00_00_00_00,
728 );
729 let mask = 0xFF_FF_00_00;
730 let actual_result = _mm256_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
731 let reference_result = 0x00 << 0 | 0x00 << 8 | 0xFF << 16 | 0xAC << 24;
732
733 assert_eq!(actual_result, reference_result);
734 }
735
736 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
737 unsafe fn test_mm_bitshuffle_epi64_mask() {
738 let test_indices = _mm_set_epi8(
739 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
740 );
741 let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
742 let actual_result = _mm_bitshuffle_epi64_mask(test_data, test_indices);
743 let reference_result = 0xFF << 0 | 0xAC << 8;
744
745 assert_eq!(actual_result, reference_result);
746 }
747
748 #[simd_test(enable = "avx512bitalg,avx512f,avx512vl")]
749 unsafe fn test_mm_mask_bitshuffle_epi64_mask() {
750 let test_indices = _mm_set_epi8(
751 63, 62, 61, 60, 59, 58, 57, 56, 63, 62, 61, 60, 59, 58, 57, 56,
752 );
753 let test_data = _mm_setr_epi64x(0xFF_00_00_00_00_00_00_00, 0xAC_00_00_00_00_00_00_00);
754 let mask = 0xFF_00;
755 let actual_result = _mm_mask_bitshuffle_epi64_mask(mask, test_data, test_indices);
756 let reference_result = 0x00 << 0 | 0xAC << 8;
757
758 assert_eq!(actual_result, reference_result);
759 }
760}
761