1 | //! Vectorized Population Count Instructions for Double- and Quadwords (VPOPCNTDQ) |
2 | //! |
3 | //! The intrinsics here correspond to those in the `immintrin.h` C header. |
4 | //! |
5 | //! The reference is [Intel 64 and IA-32 Architectures Software Developer's |
6 | //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. |
7 | //! |
8 | //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf |
9 | |
10 | use crate::core_arch::simd::*; |
11 | use crate::core_arch::x86::__m128i; |
12 | use crate::core_arch::x86::__m256i; |
13 | use crate::core_arch::x86::__m512i; |
14 | use crate::core_arch::x86::__mmask8; |
15 | use crate::core_arch::x86::__mmask16; |
16 | use crate::intrinsics::simd::{simd_ctpop, simd_select_bitmask}; |
17 | use crate::mem::transmute; |
18 | |
19 | #[cfg (test)] |
20 | use stdarch_test::assert_instr; |
21 | |
22 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
23 | /// |
24 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi32) |
25 | #[inline ] |
26 | #[target_feature (enable = "avx512vpopcntdq" )] |
27 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
28 | #[cfg_attr (test, assert_instr(vpopcntd))] |
29 | pub fn _mm512_popcnt_epi32(a: __m512i) -> __m512i { |
30 | unsafe { transmute(src:simd_ctpop(a.as_i32x16())) } |
31 | } |
32 | |
33 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
34 | /// |
35 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
36 | /// Otherwise the computation result is written into the result. |
37 | /// |
38 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi32) |
39 | #[inline ] |
40 | #[target_feature (enable = "avx512vpopcntdq" )] |
41 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
42 | #[cfg_attr (test, assert_instr(vpopcntd))] |
43 | pub fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i { |
44 | unsafe { |
45 | transmute(src:simd_select_bitmask( |
46 | m:k, |
47 | yes:simd_ctpop(a.as_i32x16()), |
48 | no:i32x16::ZERO, |
49 | )) |
50 | } |
51 | } |
52 | |
53 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
54 | /// |
55 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
56 | /// Otherwise the computation result is written into the result. |
57 | /// |
58 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi32) |
59 | #[inline ] |
60 | #[target_feature (enable = "avx512vpopcntdq" )] |
61 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
62 | #[cfg_attr (test, assert_instr(vpopcntd))] |
63 | pub fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { |
64 | unsafe { |
65 | transmute(src:simd_select_bitmask( |
66 | m:k, |
67 | yes:simd_ctpop(a.as_i32x16()), |
68 | no:src.as_i32x16(), |
69 | )) |
70 | } |
71 | } |
72 | |
73 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
74 | /// |
75 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi32) |
76 | #[inline ] |
77 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
78 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
79 | #[cfg_attr (test, assert_instr(vpopcntd))] |
80 | pub fn _mm256_popcnt_epi32(a: __m256i) -> __m256i { |
81 | unsafe { transmute(src:simd_ctpop(a.as_i32x8())) } |
82 | } |
83 | |
84 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
85 | /// |
86 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
87 | /// Otherwise the computation result is written into the result. |
88 | /// |
89 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi32) |
90 | #[inline ] |
91 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
92 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
93 | #[cfg_attr (test, assert_instr(vpopcntd))] |
94 | pub fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i { |
95 | unsafe { |
96 | transmute(src:simd_select_bitmask( |
97 | m:k, |
98 | yes:simd_ctpop(a.as_i32x8()), |
99 | no:i32x8::ZERO, |
100 | )) |
101 | } |
102 | } |
103 | |
104 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
105 | /// |
106 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
107 | /// Otherwise the computation result is written into the result. |
108 | /// |
109 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi32) |
110 | #[inline ] |
111 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
112 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
113 | #[cfg_attr (test, assert_instr(vpopcntd))] |
114 | pub fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { |
115 | unsafe { |
116 | transmute(src:simd_select_bitmask( |
117 | m:k, |
118 | yes:simd_ctpop(a.as_i32x8()), |
119 | no:src.as_i32x8(), |
120 | )) |
121 | } |
122 | } |
123 | |
124 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
125 | /// |
126 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi32) |
127 | #[inline ] |
128 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
129 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
130 | #[cfg_attr (test, assert_instr(vpopcntd))] |
131 | pub fn _mm_popcnt_epi32(a: __m128i) -> __m128i { |
132 | unsafe { transmute(src:simd_ctpop(a.as_i32x4())) } |
133 | } |
134 | |
135 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
136 | /// |
137 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
138 | /// Otherwise the computation result is written into the result. |
139 | /// |
140 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi32) |
141 | #[inline ] |
142 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
143 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
144 | #[cfg_attr (test, assert_instr(vpopcntd))] |
145 | pub fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i { |
146 | unsafe { |
147 | transmute(src:simd_select_bitmask( |
148 | m:k, |
149 | yes:simd_ctpop(a.as_i32x4()), |
150 | no:i32x4::ZERO, |
151 | )) |
152 | } |
153 | } |
154 | |
155 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
156 | /// |
157 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
158 | /// Otherwise the computation result is written into the result. |
159 | /// |
160 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi32) |
161 | #[inline ] |
162 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
163 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
164 | #[cfg_attr (test, assert_instr(vpopcntd))] |
165 | pub fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
166 | unsafe { |
167 | transmute(src:simd_select_bitmask( |
168 | m:k, |
169 | yes:simd_ctpop(a.as_i32x4()), |
170 | no:src.as_i32x4(), |
171 | )) |
172 | } |
173 | } |
174 | |
175 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
176 | /// |
177 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi64) |
178 | #[inline ] |
179 | #[target_feature (enable = "avx512vpopcntdq" )] |
180 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
181 | #[cfg_attr (test, assert_instr(vpopcntq))] |
182 | pub fn _mm512_popcnt_epi64(a: __m512i) -> __m512i { |
183 | unsafe { transmute(src:simd_ctpop(a.as_i64x8())) } |
184 | } |
185 | |
186 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
187 | /// |
188 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
189 | /// Otherwise the computation result is written into the result. |
190 | /// |
191 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi64) |
192 | #[inline ] |
193 | #[target_feature (enable = "avx512vpopcntdq" )] |
194 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
195 | #[cfg_attr (test, assert_instr(vpopcntq))] |
196 | pub fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i { |
197 | unsafe { |
198 | transmute(src:simd_select_bitmask( |
199 | m:k, |
200 | yes:simd_ctpop(a.as_i64x8()), |
201 | no:i64x8::ZERO, |
202 | )) |
203 | } |
204 | } |
205 | |
206 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
207 | /// |
208 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
209 | /// Otherwise the computation result is written into the result. |
210 | /// |
211 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi64) |
212 | #[inline ] |
213 | #[target_feature (enable = "avx512vpopcntdq" )] |
214 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
215 | #[cfg_attr (test, assert_instr(vpopcntq))] |
216 | pub fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { |
217 | unsafe { |
218 | transmute(src:simd_select_bitmask( |
219 | m:k, |
220 | yes:simd_ctpop(a.as_i64x8()), |
221 | no:src.as_i64x8(), |
222 | )) |
223 | } |
224 | } |
225 | |
226 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
227 | /// |
228 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi64) |
229 | #[inline ] |
230 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
231 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
232 | #[cfg_attr (test, assert_instr(vpopcntq))] |
233 | pub fn _mm256_popcnt_epi64(a: __m256i) -> __m256i { |
234 | unsafe { transmute(src:simd_ctpop(a.as_i64x4())) } |
235 | } |
236 | |
237 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
238 | /// |
239 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
240 | /// Otherwise the computation result is written into the result. |
241 | /// |
242 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi64) |
243 | #[inline ] |
244 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
245 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
246 | #[cfg_attr (test, assert_instr(vpopcntq))] |
247 | pub fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i { |
248 | unsafe { |
249 | transmute(src:simd_select_bitmask( |
250 | m:k, |
251 | yes:simd_ctpop(a.as_i64x4()), |
252 | no:i64x4::ZERO, |
253 | )) |
254 | } |
255 | } |
256 | |
257 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
258 | /// |
259 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
260 | /// Otherwise the computation result is written into the result. |
261 | /// |
262 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi64) |
263 | #[inline ] |
264 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
265 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
266 | #[cfg_attr (test, assert_instr(vpopcntq))] |
267 | pub fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { |
268 | unsafe { |
269 | transmute(src:simd_select_bitmask( |
270 | m:k, |
271 | yes:simd_ctpop(a.as_i64x4()), |
272 | no:src.as_i64x4(), |
273 | )) |
274 | } |
275 | } |
276 | |
277 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
278 | /// |
279 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi64) |
280 | #[inline ] |
281 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
282 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
283 | #[cfg_attr (test, assert_instr(vpopcntq))] |
284 | pub fn _mm_popcnt_epi64(a: __m128i) -> __m128i { |
285 | unsafe { transmute(src:simd_ctpop(a.as_i64x2())) } |
286 | } |
287 | |
288 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
289 | /// |
290 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
291 | /// Otherwise the computation result is written into the result. |
292 | /// |
293 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi64) |
294 | #[inline ] |
295 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
296 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
297 | #[cfg_attr (test, assert_instr(vpopcntq))] |
298 | pub fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i { |
299 | unsafe { |
300 | transmute(src:simd_select_bitmask( |
301 | m:k, |
302 | yes:simd_ctpop(a.as_i64x2()), |
303 | no:i64x2::ZERO, |
304 | )) |
305 | } |
306 | } |
307 | |
308 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
309 | /// |
310 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
311 | /// Otherwise the computation result is written into the result. |
312 | /// |
313 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi64) |
314 | #[inline ] |
315 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
316 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
317 | #[cfg_attr (test, assert_instr(vpopcntq))] |
318 | pub fn _mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
319 | unsafe { |
320 | transmute(src:simd_select_bitmask( |
321 | m:k, |
322 | yes:simd_ctpop(a.as_i64x2()), |
323 | no:src.as_i64x2(), |
324 | )) |
325 | } |
326 | } |
327 | |
328 | #[cfg (test)] |
329 | mod tests { |
330 | use stdarch_test::simd_test; |
331 | |
332 | use crate::core_arch::x86::*; |
333 | |
334 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
335 | unsafe fn test_mm512_popcnt_epi32() { |
336 | let test_data = _mm512_set_epi32( |
337 | 0, |
338 | 1, |
339 | -1, |
340 | 2, |
341 | 7, |
342 | 0xFF_FE, |
343 | 0x7F_FF_FF_FF, |
344 | -100, |
345 | 0x40_00_00_00, |
346 | 103, |
347 | 371, |
348 | 552, |
349 | 432_948, |
350 | 818_826_998, |
351 | 255, |
352 | 256, |
353 | ); |
354 | let actual_result = _mm512_popcnt_epi32(test_data); |
355 | let reference_result = |
356 | _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 1, 5, 6, 3, 10, 17, 8, 1); |
357 | assert_eq_m512i(actual_result, reference_result); |
358 | } |
359 | |
360 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
361 | unsafe fn test_mm512_mask_popcnt_epi32() { |
362 | let test_data = _mm512_set_epi32( |
363 | 0, |
364 | 1, |
365 | -1, |
366 | 2, |
367 | 7, |
368 | 0xFF_FE, |
369 | 0x7F_FF_FF_FF, |
370 | -100, |
371 | 0x40_00_00_00, |
372 | 103, |
373 | 371, |
374 | 552, |
375 | 432_948, |
376 | 818_826_998, |
377 | 255, |
378 | 256, |
379 | ); |
380 | let mask = 0xFF_00; |
381 | let actual_result = _mm512_mask_popcnt_epi32(test_data, mask, test_data); |
382 | let reference_result = _mm512_set_epi32( |
383 | 0, |
384 | 1, |
385 | 32, |
386 | 1, |
387 | 3, |
388 | 15, |
389 | 31, |
390 | 28, |
391 | 0x40_00_00_00, |
392 | 103, |
393 | 371, |
394 | 552, |
395 | 432_948, |
396 | 818_826_998, |
397 | 255, |
398 | 256, |
399 | ); |
400 | assert_eq_m512i(actual_result, reference_result); |
401 | } |
402 | |
403 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
404 | unsafe fn test_mm512_maskz_popcnt_epi32() { |
405 | let test_data = _mm512_set_epi32( |
406 | 0, |
407 | 1, |
408 | -1, |
409 | 2, |
410 | 7, |
411 | 0xFF_FE, |
412 | 0x7F_FF_FF_FF, |
413 | -100, |
414 | 0x40_00_00_00, |
415 | 103, |
416 | 371, |
417 | 552, |
418 | 432_948, |
419 | 818_826_998, |
420 | 255, |
421 | 256, |
422 | ); |
423 | let mask = 0xFF_00; |
424 | let actual_result = _mm512_maskz_popcnt_epi32(mask, test_data); |
425 | let reference_result = _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 0, 0, 0, 0, 0, 0, 0, 0); |
426 | assert_eq_m512i(actual_result, reference_result); |
427 | } |
428 | |
429 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
430 | unsafe fn test_mm256_popcnt_epi32() { |
431 | let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); |
432 | let actual_result = _mm256_popcnt_epi32(test_data); |
433 | let reference_result = _mm256_set_epi32(0, 1, 32, 1, 3, 15, 31, 28); |
434 | assert_eq_m256i(actual_result, reference_result); |
435 | } |
436 | |
437 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
438 | unsafe fn test_mm256_mask_popcnt_epi32() { |
439 | let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); |
440 | let mask = 0xF0; |
441 | let actual_result = _mm256_mask_popcnt_epi32(test_data, mask, test_data); |
442 | let reference_result = _mm256_set_epi32(0, 1, 32, 1, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); |
443 | assert_eq_m256i(actual_result, reference_result); |
444 | } |
445 | |
446 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
447 | unsafe fn test_mm256_maskz_popcnt_epi32() { |
448 | let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); |
449 | let mask = 0xF0; |
450 | let actual_result = _mm256_maskz_popcnt_epi32(mask, test_data); |
451 | let reference_result = _mm256_set_epi32(0, 1, 32, 1, 0, 0, 0, 0); |
452 | assert_eq_m256i(actual_result, reference_result); |
453 | } |
454 | |
455 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
456 | unsafe fn test_mm_popcnt_epi32() { |
457 | let test_data = _mm_set_epi32(0, 1, -1, -100); |
458 | let actual_result = _mm_popcnt_epi32(test_data); |
459 | let reference_result = _mm_set_epi32(0, 1, 32, 28); |
460 | assert_eq_m128i(actual_result, reference_result); |
461 | } |
462 | |
463 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
464 | unsafe fn test_mm_mask_popcnt_epi32() { |
465 | let test_data = _mm_set_epi32(0, 1, -1, -100); |
466 | let mask = 0xE; |
467 | let actual_result = _mm_mask_popcnt_epi32(test_data, mask, test_data); |
468 | let reference_result = _mm_set_epi32(0, 1, 32, -100); |
469 | assert_eq_m128i(actual_result, reference_result); |
470 | } |
471 | |
472 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
473 | unsafe fn test_mm_maskz_popcnt_epi32() { |
474 | let test_data = _mm_set_epi32(0, 1, -1, -100); |
475 | let mask = 0xE; |
476 | let actual_result = _mm_maskz_popcnt_epi32(mask, test_data); |
477 | let reference_result = _mm_set_epi32(0, 1, 32, 0); |
478 | assert_eq_m128i(actual_result, reference_result); |
479 | } |
480 | |
481 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
482 | unsafe fn test_mm512_popcnt_epi64() { |
483 | let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); |
484 | let actual_result = _mm512_popcnt_epi64(test_data); |
485 | let reference_result = _mm512_set_epi64(0, 1, 64, 1, 3, 15, 63, 60); |
486 | assert_eq_m512i(actual_result, reference_result); |
487 | } |
488 | |
489 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
490 | unsafe fn test_mm512_mask_popcnt_epi64() { |
491 | let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); |
492 | let mask = 0xF0; |
493 | let actual_result = _mm512_mask_popcnt_epi64(test_data, mask, test_data); |
494 | let reference_result = |
495 | _mm512_set_epi64(0, 1, 64, 1, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); |
496 | assert_eq_m512i(actual_result, reference_result); |
497 | } |
498 | |
499 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
500 | unsafe fn test_mm512_maskz_popcnt_epi64() { |
501 | let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); |
502 | let mask = 0xF0; |
503 | let actual_result = _mm512_maskz_popcnt_epi64(mask, test_data); |
504 | let reference_result = _mm512_set_epi64(0, 1, 64, 1, 0, 0, 0, 0); |
505 | assert_eq_m512i(actual_result, reference_result); |
506 | } |
507 | |
508 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
509 | unsafe fn test_mm256_popcnt_epi64() { |
510 | let test_data = _mm256_set_epi64x(0, 1, -1, -100); |
511 | let actual_result = _mm256_popcnt_epi64(test_data); |
512 | let reference_result = _mm256_set_epi64x(0, 1, 64, 60); |
513 | assert_eq_m256i(actual_result, reference_result); |
514 | } |
515 | |
516 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
517 | unsafe fn test_mm256_mask_popcnt_epi64() { |
518 | let test_data = _mm256_set_epi64x(0, 1, -1, -100); |
519 | let mask = 0xE; |
520 | let actual_result = _mm256_mask_popcnt_epi64(test_data, mask, test_data); |
521 | let reference_result = _mm256_set_epi64x(0, 1, 64, -100); |
522 | assert_eq_m256i(actual_result, reference_result); |
523 | } |
524 | |
525 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
526 | unsafe fn test_mm256_maskz_popcnt_epi64() { |
527 | let test_data = _mm256_set_epi64x(0, 1, -1, -100); |
528 | let mask = 0xE; |
529 | let actual_result = _mm256_maskz_popcnt_epi64(mask, test_data); |
530 | let reference_result = _mm256_set_epi64x(0, 1, 64, 0); |
531 | assert_eq_m256i(actual_result, reference_result); |
532 | } |
533 | |
534 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
535 | unsafe fn test_mm_popcnt_epi64() { |
536 | let test_data = _mm_set_epi64x(0, 1); |
537 | let actual_result = _mm_popcnt_epi64(test_data); |
538 | let reference_result = _mm_set_epi64x(0, 1); |
539 | assert_eq_m128i(actual_result, reference_result); |
540 | let test_data = _mm_set_epi64x(-1, -100); |
541 | let actual_result = _mm_popcnt_epi64(test_data); |
542 | let reference_result = _mm_set_epi64x(64, 60); |
543 | assert_eq_m128i(actual_result, reference_result); |
544 | } |
545 | |
546 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
547 | unsafe fn test_mm_mask_popcnt_epi64() { |
548 | let test_data = _mm_set_epi64x(0, -100); |
549 | let mask = 0x2; |
550 | let actual_result = _mm_mask_popcnt_epi64(test_data, mask, test_data); |
551 | let reference_result = _mm_set_epi64x(0, -100); |
552 | assert_eq_m128i(actual_result, reference_result); |
553 | let test_data = _mm_set_epi64x(-1, 1); |
554 | let mask = 0x2; |
555 | let actual_result = _mm_mask_popcnt_epi64(test_data, mask, test_data); |
556 | let reference_result = _mm_set_epi64x(64, 1); |
557 | assert_eq_m128i(actual_result, reference_result); |
558 | } |
559 | |
560 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
561 | unsafe fn test_mm_maskz_popcnt_epi64() { |
562 | let test_data = _mm_set_epi64x(0, 1); |
563 | let mask = 0x2; |
564 | let actual_result = _mm_maskz_popcnt_epi64(mask, test_data); |
565 | let reference_result = _mm_set_epi64x(0, 0); |
566 | assert_eq_m128i(actual_result, reference_result); |
567 | let test_data = _mm_set_epi64x(-1, -100); |
568 | let mask = 0x2; |
569 | let actual_result = _mm_maskz_popcnt_epi64(mask, test_data); |
570 | let reference_result = _mm_set_epi64x(64, 0); |
571 | assert_eq_m128i(actual_result, reference_result); |
572 | } |
573 | } |
574 | |