1//! Vectorized Population Count Instructions for Double- and Quadwords (VPOPCNTDQ)
2//!
3//! The intrinsics here correspond to those in the `immintrin.h` C header.
4//!
5//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
6//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
7//!
8//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
9
10use crate::core_arch::simd::i32x16;
11use crate::core_arch::simd::i32x4;
12use crate::core_arch::simd::i32x8;
13use crate::core_arch::simd::i64x2;
14use crate::core_arch::simd::i64x4;
15use crate::core_arch::simd::i64x8;
16use crate::core_arch::simd_llvm::simd_select_bitmask;
17use crate::core_arch::x86::__m128i;
18use crate::core_arch::x86::__m256i;
19use crate::core_arch::x86::__m512i;
20use crate::core_arch::x86::__mmask16;
21use crate::core_arch::x86::__mmask8;
22use crate::core_arch::x86::_mm256_setzero_si256;
23use crate::core_arch::x86::_mm512_setzero_si512;
24use crate::core_arch::x86::_mm_setzero_si128;
25use crate::core_arch::x86::m128iExt;
26use crate::core_arch::x86::m256iExt;
27use crate::core_arch::x86::m512iExt;
28use crate::mem::transmute;
29
30#[cfg(test)]
31use stdarch_test::assert_instr;
32
33#[allow(improper_ctypes)]
34extern "C" {
35 #[link_name = "llvm.ctpop.v16i32"]
36 fn popcnt_v16i32(x: i32x16) -> i32x16;
37 #[link_name = "llvm.ctpop.v8i32"]
38 fn popcnt_v8i32(x: i32x8) -> i32x8;
39 #[link_name = "llvm.ctpop.v4i32"]
40 fn popcnt_v4i32(x: i32x4) -> i32x4;
41
42 #[link_name = "llvm.ctpop.v8i64"]
43 fn popcnt_v8i64(x: i64x8) -> i64x8;
44 #[link_name = "llvm.ctpop.v4i64"]
45 fn popcnt_v4i64(x: i64x4) -> i64x4;
46 #[link_name = "llvm.ctpop.v2i64"]
47 fn popcnt_v2i64(x: i64x2) -> i64x2;
48}
49
50/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
51///
52/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi32)
53#[inline]
54#[target_feature(enable = "avx512vpopcntdq")]
55#[cfg_attr(test, assert_instr(vpopcntd))]
56pub unsafe fn _mm512_popcnt_epi32(a: __m512i) -> __m512i {
57 transmute(src:popcnt_v16i32(a.as_i32x16()))
58}
59
60/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
61///
62/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
63/// Otherwise the computation result is written into the result.
64///
65/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi32)
66#[inline]
67#[target_feature(enable = "avx512vpopcntdq")]
68#[cfg_attr(test, assert_instr(vpopcntd))]
69pub unsafe fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
70 let zero: i32x16 = _mm512_setzero_si512().as_i32x16();
71 transmute(src:simd_select_bitmask(m:k, a:popcnt_v16i32(a.as_i32x16()), b:zero))
72}
73
74/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
75///
76/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
77/// Otherwise the computation result is written into the result.
78///
79/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi32)
80#[inline]
81#[target_feature(enable = "avx512vpopcntdq")]
82#[cfg_attr(test, assert_instr(vpopcntd))]
83pub unsafe fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
84 transmute(src:simd_select_bitmask(
85 m:k,
86 a:popcnt_v16i32(a.as_i32x16()),
87 b:src.as_i32x16(),
88 ))
89}
90
91/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
92///
93/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi32)
94#[inline]
95#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
96#[cfg_attr(test, assert_instr(vpopcntd))]
97pub unsafe fn _mm256_popcnt_epi32(a: __m256i) -> __m256i {
98 transmute(src:popcnt_v8i32(a.as_i32x8()))
99}
100
101/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
102///
103/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
104/// Otherwise the computation result is written into the result.
105///
106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi32)
107#[inline]
108#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
109#[cfg_attr(test, assert_instr(vpopcntd))]
110pub unsafe fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
111 let zero: i32x8 = _mm256_setzero_si256().as_i32x8();
112 transmute(src:simd_select_bitmask(m:k, a:popcnt_v8i32(a.as_i32x8()), b:zero))
113}
114
115/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
116///
117/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
118/// Otherwise the computation result is written into the result.
119///
120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi32)
121#[inline]
122#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
123#[cfg_attr(test, assert_instr(vpopcntd))]
124pub unsafe fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
125 transmute(src:simd_select_bitmask(
126 m:k,
127 a:popcnt_v8i32(a.as_i32x8()),
128 b:src.as_i32x8(),
129 ))
130}
131
132/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
133///
134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi32)
135#[inline]
136#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
137#[cfg_attr(test, assert_instr(vpopcntd))]
138pub unsafe fn _mm_popcnt_epi32(a: __m128i) -> __m128i {
139 transmute(src:popcnt_v4i32(a.as_i32x4()))
140}
141
142/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
143///
144/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
145/// Otherwise the computation result is written into the result.
146///
147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi32)
148#[inline]
149#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
150#[cfg_attr(test, assert_instr(vpopcntd))]
151pub unsafe fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
152 let zero: i32x4 = _mm_setzero_si128().as_i32x4();
153 transmute(src:simd_select_bitmask(m:k, a:popcnt_v4i32(a.as_i32x4()), b:zero))
154}
155
156/// For each packed 32-bit integer maps the value to the number of logical 1 bits.
157///
158/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
159/// Otherwise the computation result is written into the result.
160///
161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi32)
162#[inline]
163#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
164#[cfg_attr(test, assert_instr(vpopcntd))]
165pub unsafe fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
166 transmute(src:simd_select_bitmask(
167 m:k,
168 a:popcnt_v4i32(a.as_i32x4()),
169 b:src.as_i32x4(),
170 ))
171}
172
173/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
174///
175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi64)
176#[inline]
177#[target_feature(enable = "avx512vpopcntdq")]
178#[cfg_attr(test, assert_instr(vpopcntq))]
179pub unsafe fn _mm512_popcnt_epi64(a: __m512i) -> __m512i {
180 transmute(src:popcnt_v8i64(a.as_i64x8()))
181}
182
183/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
184///
185/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
186/// Otherwise the computation result is written into the result.
187///
188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi64)
189#[inline]
190#[target_feature(enable = "avx512vpopcntdq")]
191#[cfg_attr(test, assert_instr(vpopcntq))]
192pub unsafe fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
193 let zero: i64x8 = _mm512_setzero_si512().as_i64x8();
194 transmute(src:simd_select_bitmask(m:k, a:popcnt_v8i64(a.as_i64x8()), b:zero))
195}
196
197/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
198///
199/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
200/// Otherwise the computation result is written into the result.
201///
202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi64)
203#[inline]
204#[target_feature(enable = "avx512vpopcntdq")]
205#[cfg_attr(test, assert_instr(vpopcntq))]
206pub unsafe fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
207 transmute(src:simd_select_bitmask(
208 m:k,
209 a:popcnt_v8i64(a.as_i64x8()),
210 b:src.as_i64x8(),
211 ))
212}
213
214/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
215///
216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi64)
217#[inline]
218#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
219#[cfg_attr(test, assert_instr(vpopcntq))]
220pub unsafe fn _mm256_popcnt_epi64(a: __m256i) -> __m256i {
221 transmute(src:popcnt_v4i64(a.as_i64x4()))
222}
223
224/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
225///
226/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
227/// Otherwise the computation result is written into the result.
228///
229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi64)
230#[inline]
231#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
232#[cfg_attr(test, assert_instr(vpopcntq))]
233pub unsafe fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
234 let zero: i64x4 = _mm256_setzero_si256().as_i64x4();
235 transmute(src:simd_select_bitmask(m:k, a:popcnt_v4i64(a.as_i64x4()), b:zero))
236}
237
238/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
239///
240/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
241/// Otherwise the computation result is written into the result.
242///
243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi64)
244#[inline]
245#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
246#[cfg_attr(test, assert_instr(vpopcntq))]
247pub unsafe fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
248 transmute(src:simd_select_bitmask(
249 m:k,
250 a:popcnt_v4i64(a.as_i64x4()),
251 b:src.as_i64x4(),
252 ))
253}
254
255/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
256///
257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi64)
258#[inline]
259#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
260#[cfg_attr(test, assert_instr(vpopcntq))]
261pub unsafe fn _mm_popcnt_epi64(a: __m128i) -> __m128i {
262 transmute(src:popcnt_v2i64(a.as_i64x2()))
263}
264
265/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
266///
267/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
268/// Otherwise the computation result is written into the result.
269///
270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi64)
271#[inline]
272#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
273#[cfg_attr(test, assert_instr(vpopcntq))]
274pub unsafe fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
275 let zero: i64x2 = _mm_setzero_si128().as_i64x2();
276 transmute(src:simd_select_bitmask(m:k, a:popcnt_v2i64(a.as_i64x2()), b:zero))
277}
278
279/// For each packed 64-bit integer maps the value to the number of logical 1 bits.
280///
281/// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set.
282/// Otherwise the computation result is written into the result.
283///
284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi64)
285#[inline]
286#[target_feature(enable = "avx512vpopcntdq,avx512vl")]
287#[cfg_attr(test, assert_instr(vpopcntq))]
288pub unsafe fn _mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
289 transmute(src:simd_select_bitmask(
290 m:k,
291 a:popcnt_v2i64(a.as_i64x2()),
292 b:src.as_i64x2(),
293 ))
294}
295
296#[cfg(test)]
297mod tests {
298 use stdarch_test::simd_test;
299
300 use crate::core_arch::x86::*;
301
302 #[simd_test(enable = "avx512vpopcntdq,avx512f")]
303 unsafe fn test_mm512_popcnt_epi32() {
304 let test_data = _mm512_set_epi32(
305 0,
306 1,
307 -1,
308 2,
309 7,
310 0xFF_FE,
311 0x7F_FF_FF_FF,
312 -100,
313 0x40_00_00_00,
314 103,
315 371,
316 552,
317 432_948,
318 818_826_998,
319 255,
320 256,
321 );
322 let actual_result = _mm512_popcnt_epi32(test_data);
323 let reference_result =
324 _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 1, 5, 6, 3, 10, 17, 8, 1);
325 assert_eq_m512i(actual_result, reference_result);
326 }
327
328 #[simd_test(enable = "avx512vpopcntdq,avx512f")]
329 unsafe fn test_mm512_mask_popcnt_epi32() {
330 let test_data = _mm512_set_epi32(
331 0,
332 1,
333 -1,
334 2,
335 7,
336 0xFF_FE,
337 0x7F_FF_FF_FF,
338 -100,
339 0x40_00_00_00,
340 103,
341 371,
342 552,
343 432_948,
344 818_826_998,
345 255,
346 256,
347 );
348 let mask = 0xFF_00;
349 let actual_result = _mm512_mask_popcnt_epi32(test_data, mask, test_data);
350 let reference_result = _mm512_set_epi32(
351 0,
352 1,
353 32,
354 1,
355 3,
356 15,
357 31,
358 28,
359 0x40_00_00_00,
360 103,
361 371,
362 552,
363 432_948,
364 818_826_998,
365 255,
366 256,
367 );
368 assert_eq_m512i(actual_result, reference_result);
369 }
370
371 #[simd_test(enable = "avx512vpopcntdq,avx512f")]
372 unsafe fn test_mm512_maskz_popcnt_epi32() {
373 let test_data = _mm512_set_epi32(
374 0,
375 1,
376 -1,
377 2,
378 7,
379 0xFF_FE,
380 0x7F_FF_FF_FF,
381 -100,
382 0x40_00_00_00,
383 103,
384 371,
385 552,
386 432_948,
387 818_826_998,
388 255,
389 256,
390 );
391 let mask = 0xFF_00;
392 let actual_result = _mm512_maskz_popcnt_epi32(mask, test_data);
393 let reference_result = _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 0, 0, 0, 0, 0, 0, 0, 0);
394 assert_eq_m512i(actual_result, reference_result);
395 }
396
397 #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
398 unsafe fn test_mm256_popcnt_epi32() {
399 let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
400 let actual_result = _mm256_popcnt_epi32(test_data);
401 let reference_result = _mm256_set_epi32(0, 1, 32, 1, 3, 15, 31, 28);
402 assert_eq_m256i(actual_result, reference_result);
403 }
404
405 #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
406 unsafe fn test_mm256_mask_popcnt_epi32() {
407 let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
408 let mask = 0xF0;
409 let actual_result = _mm256_mask_popcnt_epi32(test_data, mask, test_data);
410 let reference_result = _mm256_set_epi32(0, 1, 32, 1, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
411 assert_eq_m256i(actual_result, reference_result);
412 }
413
414 #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
415 unsafe fn test_mm256_maskz_popcnt_epi32() {
416 let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100);
417 let mask = 0xF0;
418 let actual_result = _mm256_maskz_popcnt_epi32(mask, test_data);
419 let reference_result = _mm256_set_epi32(0, 1, 32, 1, 0, 0, 0, 0);
420 assert_eq_m256i(actual_result, reference_result);
421 }
422
423 #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
424 unsafe fn test_mm_popcnt_epi32() {
425 let test_data = _mm_set_epi32(0, 1, -1, -100);
426 let actual_result = _mm_popcnt_epi32(test_data);
427 let reference_result = _mm_set_epi32(0, 1, 32, 28);
428 assert_eq_m128i(actual_result, reference_result);
429 }
430
431 #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
432 unsafe fn test_mm_mask_popcnt_epi32() {
433 let test_data = _mm_set_epi32(0, 1, -1, -100);
434 let mask = 0xE;
435 let actual_result = _mm_mask_popcnt_epi32(test_data, mask, test_data);
436 let reference_result = _mm_set_epi32(0, 1, 32, -100);
437 assert_eq_m128i(actual_result, reference_result);
438 }
439
440 #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl")]
441 unsafe fn test_mm_maskz_popcnt_epi32() {
442 let test_data = _mm_set_epi32(0, 1, -1, -100);
443 let mask = 0xE;
444 let actual_result = _mm_maskz_popcnt_epi32(mask, test_data);
445 let reference_result = _mm_set_epi32(0, 1, 32, 0);
446 assert_eq_m128i(actual_result, reference_result);
447 }
448
449 #[simd_test(enable = "avx512vpopcntdq,avx512f")]
450 unsafe fn test_mm512_popcnt_epi64() {
451 let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
452 let actual_result = _mm512_popcnt_epi64(test_data);
453 let reference_result = _mm512_set_epi64(0, 1, 64, 1, 3, 15, 63, 60);
454 assert_eq_m512i(actual_result, reference_result);
455 }
456
457 #[simd_test(enable = "avx512vpopcntdq,avx512f")]
458 unsafe fn test_mm512_mask_popcnt_epi64() {
459 let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
460 let mask = 0xF0;
461 let actual_result = _mm512_mask_popcnt_epi64(test_data, mask, test_data);
462 let reference_result =
463 _mm512_set_epi64(0, 1, 64, 1, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
464 assert_eq_m512i(actual_result, reference_result);
465 }
466
467 #[simd_test(enable = "avx512vpopcntdq,avx512f")]
468 unsafe fn test_mm512_maskz_popcnt_epi64() {
469 let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100);
470 let mask = 0xF0;
471 let actual_result = _mm512_maskz_popcnt_epi64(mask, test_data);
472 let reference_result = _mm512_set_epi64(0, 1, 64, 1, 0, 0, 0, 0);
473 assert_eq_m512i(actual_result, reference_result);
474 }
475
476 #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
477 unsafe fn test_mm256_popcnt_epi64() {
478 let test_data = _mm256_set_epi64x(0, 1, -1, -100);
479 let actual_result = _mm256_popcnt_epi64(test_data);
480 let reference_result = _mm256_set_epi64x(0, 1, 64, 60);
481 assert_eq_m256i(actual_result, reference_result);
482 }
483
484 #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
485 unsafe fn test_mm256_mask_popcnt_epi64() {
486 let test_data = _mm256_set_epi64x(0, 1, -1, -100);
487 let mask = 0xE;
488 let actual_result = _mm256_mask_popcnt_epi64(test_data, mask, test_data);
489 let reference_result = _mm256_set_epi64x(0, 1, 64, -100);
490 assert_eq_m256i(actual_result, reference_result);
491 }
492
493 #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
494 unsafe fn test_mm256_maskz_popcnt_epi64() {
495 let test_data = _mm256_set_epi64x(0, 1, -1, -100);
496 let mask = 0xE;
497 let actual_result = _mm256_maskz_popcnt_epi64(mask, test_data);
498 let reference_result = _mm256_set_epi64x(0, 1, 64, 0);
499 assert_eq_m256i(actual_result, reference_result);
500 }
501
502 #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
503 unsafe fn test_mm_popcnt_epi64() {
504 let test_data = _mm_set_epi64x(0, 1);
505 let actual_result = _mm_popcnt_epi64(test_data);
506 let reference_result = _mm_set_epi64x(0, 1);
507 assert_eq_m128i(actual_result, reference_result);
508 let test_data = _mm_set_epi64x(-1, -100);
509 let actual_result = _mm_popcnt_epi64(test_data);
510 let reference_result = _mm_set_epi64x(64, 60);
511 assert_eq_m128i(actual_result, reference_result);
512 }
513
514 #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
515 unsafe fn test_mm_mask_popcnt_epi64() {
516 let test_data = _mm_set_epi64x(0, -100);
517 let mask = 0x2;
518 let actual_result = _mm_mask_popcnt_epi64(test_data, mask, test_data);
519 let reference_result = _mm_set_epi64x(0, -100);
520 assert_eq_m128i(actual_result, reference_result);
521 let test_data = _mm_set_epi64x(-1, 1);
522 let mask = 0x2;
523 let actual_result = _mm_mask_popcnt_epi64(test_data, mask, test_data);
524 let reference_result = _mm_set_epi64x(64, 1);
525 assert_eq_m128i(actual_result, reference_result);
526 }
527
528 #[simd_test(enable = "avx512vpopcntdq,avx512vl")]
529 unsafe fn test_mm_maskz_popcnt_epi64() {
530 let test_data = _mm_set_epi64x(0, 1);
531 let mask = 0x2;
532 let actual_result = _mm_maskz_popcnt_epi64(mask, test_data);
533 let reference_result = _mm_set_epi64x(0, 0);
534 assert_eq_m128i(actual_result, reference_result);
535 let test_data = _mm_set_epi64x(-1, -100);
536 let mask = 0x2;
537 let actual_result = _mm_maskz_popcnt_epi64(mask, test_data);
538 let reference_result = _mm_set_epi64x(64, 0);
539 assert_eq_m128i(actual_result, reference_result);
540 }
541}
542