1 | //! Vectorized Population Count Instructions for Double- and Quadwords (VPOPCNTDQ) |
2 | //! |
3 | //! The intrinsics here correspond to those in the `immintrin.h` C header. |
4 | //! |
5 | //! The reference is [Intel 64 and IA-32 Architectures Software Developer's |
6 | //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. |
7 | //! |
8 | //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf |
9 | |
10 | use crate::core_arch::simd::i32x16; |
11 | use crate::core_arch::simd::i32x4; |
12 | use crate::core_arch::simd::i32x8; |
13 | use crate::core_arch::simd::i64x2; |
14 | use crate::core_arch::simd::i64x4; |
15 | use crate::core_arch::simd::i64x8; |
16 | use crate::core_arch::simd_llvm::simd_select_bitmask; |
17 | use crate::core_arch::x86::__m128i; |
18 | use crate::core_arch::x86::__m256i; |
19 | use crate::core_arch::x86::__m512i; |
20 | use crate::core_arch::x86::__mmask16; |
21 | use crate::core_arch::x86::__mmask8; |
22 | use crate::core_arch::x86::_mm256_setzero_si256; |
23 | use crate::core_arch::x86::_mm512_setzero_si512; |
24 | use crate::core_arch::x86::_mm_setzero_si128; |
25 | use crate::core_arch::x86::m128iExt; |
26 | use crate::core_arch::x86::m256iExt; |
27 | use crate::core_arch::x86::m512iExt; |
28 | use crate::mem::transmute; |
29 | |
30 | #[cfg (test)] |
31 | use stdarch_test::assert_instr; |
32 | |
33 | #[allow (improper_ctypes)] |
34 | extern "C" { |
35 | #[link_name = "llvm.ctpop.v16i32" ] |
36 | fn popcnt_v16i32(x: i32x16) -> i32x16; |
37 | #[link_name = "llvm.ctpop.v8i32" ] |
38 | fn popcnt_v8i32(x: i32x8) -> i32x8; |
39 | #[link_name = "llvm.ctpop.v4i32" ] |
40 | fn popcnt_v4i32(x: i32x4) -> i32x4; |
41 | |
42 | #[link_name = "llvm.ctpop.v8i64" ] |
43 | fn popcnt_v8i64(x: i64x8) -> i64x8; |
44 | #[link_name = "llvm.ctpop.v4i64" ] |
45 | fn popcnt_v4i64(x: i64x4) -> i64x4; |
46 | #[link_name = "llvm.ctpop.v2i64" ] |
47 | fn popcnt_v2i64(x: i64x2) -> i64x2; |
48 | } |
49 | |
50 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
51 | /// |
52 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi32) |
53 | #[inline ] |
54 | #[target_feature (enable = "avx512vpopcntdq" )] |
55 | #[cfg_attr (test, assert_instr(vpopcntd))] |
56 | pub unsafe fn _mm512_popcnt_epi32(a: __m512i) -> __m512i { |
57 | transmute(src:popcnt_v16i32(a.as_i32x16())) |
58 | } |
59 | |
60 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
61 | /// |
62 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
63 | /// Otherwise the computation result is written into the result. |
64 | /// |
65 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi32) |
66 | #[inline ] |
67 | #[target_feature (enable = "avx512vpopcntdq" )] |
68 | #[cfg_attr (test, assert_instr(vpopcntd))] |
69 | pub unsafe fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i { |
70 | let zero: i32x16 = _mm512_setzero_si512().as_i32x16(); |
71 | transmute(src:simd_select_bitmask(m:k, a:popcnt_v16i32(a.as_i32x16()), b:zero)) |
72 | } |
73 | |
74 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
75 | /// |
76 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
77 | /// Otherwise the computation result is written into the result. |
78 | /// |
79 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi32) |
80 | #[inline ] |
81 | #[target_feature (enable = "avx512vpopcntdq" )] |
82 | #[cfg_attr (test, assert_instr(vpopcntd))] |
83 | pub unsafe fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { |
84 | transmute(src:simd_select_bitmask( |
85 | m:k, |
86 | a:popcnt_v16i32(a.as_i32x16()), |
87 | b:src.as_i32x16(), |
88 | )) |
89 | } |
90 | |
91 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
92 | /// |
93 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi32) |
94 | #[inline ] |
95 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
96 | #[cfg_attr (test, assert_instr(vpopcntd))] |
97 | pub unsafe fn _mm256_popcnt_epi32(a: __m256i) -> __m256i { |
98 | transmute(src:popcnt_v8i32(a.as_i32x8())) |
99 | } |
100 | |
101 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
102 | /// |
103 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
104 | /// Otherwise the computation result is written into the result. |
105 | /// |
106 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi32) |
107 | #[inline ] |
108 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
109 | #[cfg_attr (test, assert_instr(vpopcntd))] |
110 | pub unsafe fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i { |
111 | let zero: i32x8 = _mm256_setzero_si256().as_i32x8(); |
112 | transmute(src:simd_select_bitmask(m:k, a:popcnt_v8i32(a.as_i32x8()), b:zero)) |
113 | } |
114 | |
115 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
116 | /// |
117 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
118 | /// Otherwise the computation result is written into the result. |
119 | /// |
120 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi32) |
121 | #[inline ] |
122 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
123 | #[cfg_attr (test, assert_instr(vpopcntd))] |
124 | pub unsafe fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { |
125 | transmute(src:simd_select_bitmask( |
126 | m:k, |
127 | a:popcnt_v8i32(a.as_i32x8()), |
128 | b:src.as_i32x8(), |
129 | )) |
130 | } |
131 | |
132 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
133 | /// |
134 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi32) |
135 | #[inline ] |
136 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
137 | #[cfg_attr (test, assert_instr(vpopcntd))] |
138 | pub unsafe fn _mm_popcnt_epi32(a: __m128i) -> __m128i { |
139 | transmute(src:popcnt_v4i32(a.as_i32x4())) |
140 | } |
141 | |
142 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
143 | /// |
144 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
145 | /// Otherwise the computation result is written into the result. |
146 | /// |
147 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi32) |
148 | #[inline ] |
149 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
150 | #[cfg_attr (test, assert_instr(vpopcntd))] |
151 | pub unsafe fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i { |
152 | let zero: i32x4 = _mm_setzero_si128().as_i32x4(); |
153 | transmute(src:simd_select_bitmask(m:k, a:popcnt_v4i32(a.as_i32x4()), b:zero)) |
154 | } |
155 | |
156 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
157 | /// |
158 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
159 | /// Otherwise the computation result is written into the result. |
160 | /// |
161 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi32) |
162 | #[inline ] |
163 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
164 | #[cfg_attr (test, assert_instr(vpopcntd))] |
165 | pub unsafe fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
166 | transmute(src:simd_select_bitmask( |
167 | m:k, |
168 | a:popcnt_v4i32(a.as_i32x4()), |
169 | b:src.as_i32x4(), |
170 | )) |
171 | } |
172 | |
173 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
174 | /// |
175 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi64) |
176 | #[inline ] |
177 | #[target_feature (enable = "avx512vpopcntdq" )] |
178 | #[cfg_attr (test, assert_instr(vpopcntq))] |
179 | pub unsafe fn _mm512_popcnt_epi64(a: __m512i) -> __m512i { |
180 | transmute(src:popcnt_v8i64(a.as_i64x8())) |
181 | } |
182 | |
183 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
184 | /// |
185 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
186 | /// Otherwise the computation result is written into the result. |
187 | /// |
188 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi64) |
189 | #[inline ] |
190 | #[target_feature (enable = "avx512vpopcntdq" )] |
191 | #[cfg_attr (test, assert_instr(vpopcntq))] |
192 | pub unsafe fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i { |
193 | let zero: i64x8 = _mm512_setzero_si512().as_i64x8(); |
194 | transmute(src:simd_select_bitmask(m:k, a:popcnt_v8i64(a.as_i64x8()), b:zero)) |
195 | } |
196 | |
197 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
198 | /// |
199 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
200 | /// Otherwise the computation result is written into the result. |
201 | /// |
202 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi64) |
203 | #[inline ] |
204 | #[target_feature (enable = "avx512vpopcntdq" )] |
205 | #[cfg_attr (test, assert_instr(vpopcntq))] |
206 | pub unsafe fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { |
207 | transmute(src:simd_select_bitmask( |
208 | m:k, |
209 | a:popcnt_v8i64(a.as_i64x8()), |
210 | b:src.as_i64x8(), |
211 | )) |
212 | } |
213 | |
214 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
215 | /// |
216 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi64) |
217 | #[inline ] |
218 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
219 | #[cfg_attr (test, assert_instr(vpopcntq))] |
220 | pub unsafe fn _mm256_popcnt_epi64(a: __m256i) -> __m256i { |
221 | transmute(src:popcnt_v4i64(a.as_i64x4())) |
222 | } |
223 | |
224 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
225 | /// |
226 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
227 | /// Otherwise the computation result is written into the result. |
228 | /// |
229 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi64) |
230 | #[inline ] |
231 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
232 | #[cfg_attr (test, assert_instr(vpopcntq))] |
233 | pub unsafe fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i { |
234 | let zero: i64x4 = _mm256_setzero_si256().as_i64x4(); |
235 | transmute(src:simd_select_bitmask(m:k, a:popcnt_v4i64(a.as_i64x4()), b:zero)) |
236 | } |
237 | |
238 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
239 | /// |
240 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
241 | /// Otherwise the computation result is written into the result. |
242 | /// |
243 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi64) |
244 | #[inline ] |
245 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
246 | #[cfg_attr (test, assert_instr(vpopcntq))] |
247 | pub unsafe fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { |
248 | transmute(src:simd_select_bitmask( |
249 | m:k, |
250 | a:popcnt_v4i64(a.as_i64x4()), |
251 | b:src.as_i64x4(), |
252 | )) |
253 | } |
254 | |
255 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
256 | /// |
257 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi64) |
258 | #[inline ] |
259 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
260 | #[cfg_attr (test, assert_instr(vpopcntq))] |
261 | pub unsafe fn _mm_popcnt_epi64(a: __m128i) -> __m128i { |
262 | transmute(src:popcnt_v2i64(a.as_i64x2())) |
263 | } |
264 | |
265 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
266 | /// |
267 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
268 | /// Otherwise the computation result is written into the result. |
269 | /// |
270 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi64) |
271 | #[inline ] |
272 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
273 | #[cfg_attr (test, assert_instr(vpopcntq))] |
274 | pub unsafe fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i { |
275 | let zero: i64x2 = _mm_setzero_si128().as_i64x2(); |
276 | transmute(src:simd_select_bitmask(m:k, a:popcnt_v2i64(a.as_i64x2()), b:zero)) |
277 | } |
278 | |
279 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
280 | /// |
281 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
282 | /// Otherwise the computation result is written into the result. |
283 | /// |
284 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi64) |
285 | #[inline ] |
286 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
287 | #[cfg_attr (test, assert_instr(vpopcntq))] |
288 | pub unsafe fn _mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
289 | transmute(src:simd_select_bitmask( |
290 | m:k, |
291 | a:popcnt_v2i64(a.as_i64x2()), |
292 | b:src.as_i64x2(), |
293 | )) |
294 | } |
295 | |
296 | #[cfg (test)] |
297 | mod tests { |
298 | use stdarch_test::simd_test; |
299 | |
300 | use crate::core_arch::x86::*; |
301 | |
302 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
303 | unsafe fn test_mm512_popcnt_epi32() { |
304 | let test_data = _mm512_set_epi32( |
305 | 0, |
306 | 1, |
307 | -1, |
308 | 2, |
309 | 7, |
310 | 0xFF_FE, |
311 | 0x7F_FF_FF_FF, |
312 | -100, |
313 | 0x40_00_00_00, |
314 | 103, |
315 | 371, |
316 | 552, |
317 | 432_948, |
318 | 818_826_998, |
319 | 255, |
320 | 256, |
321 | ); |
322 | let actual_result = _mm512_popcnt_epi32(test_data); |
323 | let reference_result = |
324 | _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 1, 5, 6, 3, 10, 17, 8, 1); |
325 | assert_eq_m512i(actual_result, reference_result); |
326 | } |
327 | |
328 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
329 | unsafe fn test_mm512_mask_popcnt_epi32() { |
330 | let test_data = _mm512_set_epi32( |
331 | 0, |
332 | 1, |
333 | -1, |
334 | 2, |
335 | 7, |
336 | 0xFF_FE, |
337 | 0x7F_FF_FF_FF, |
338 | -100, |
339 | 0x40_00_00_00, |
340 | 103, |
341 | 371, |
342 | 552, |
343 | 432_948, |
344 | 818_826_998, |
345 | 255, |
346 | 256, |
347 | ); |
348 | let mask = 0xFF_00; |
349 | let actual_result = _mm512_mask_popcnt_epi32(test_data, mask, test_data); |
350 | let reference_result = _mm512_set_epi32( |
351 | 0, |
352 | 1, |
353 | 32, |
354 | 1, |
355 | 3, |
356 | 15, |
357 | 31, |
358 | 28, |
359 | 0x40_00_00_00, |
360 | 103, |
361 | 371, |
362 | 552, |
363 | 432_948, |
364 | 818_826_998, |
365 | 255, |
366 | 256, |
367 | ); |
368 | assert_eq_m512i(actual_result, reference_result); |
369 | } |
370 | |
371 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
372 | unsafe fn test_mm512_maskz_popcnt_epi32() { |
373 | let test_data = _mm512_set_epi32( |
374 | 0, |
375 | 1, |
376 | -1, |
377 | 2, |
378 | 7, |
379 | 0xFF_FE, |
380 | 0x7F_FF_FF_FF, |
381 | -100, |
382 | 0x40_00_00_00, |
383 | 103, |
384 | 371, |
385 | 552, |
386 | 432_948, |
387 | 818_826_998, |
388 | 255, |
389 | 256, |
390 | ); |
391 | let mask = 0xFF_00; |
392 | let actual_result = _mm512_maskz_popcnt_epi32(mask, test_data); |
393 | let reference_result = _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 0, 0, 0, 0, 0, 0, 0, 0); |
394 | assert_eq_m512i(actual_result, reference_result); |
395 | } |
396 | |
397 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
398 | unsafe fn test_mm256_popcnt_epi32() { |
399 | let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); |
400 | let actual_result = _mm256_popcnt_epi32(test_data); |
401 | let reference_result = _mm256_set_epi32(0, 1, 32, 1, 3, 15, 31, 28); |
402 | assert_eq_m256i(actual_result, reference_result); |
403 | } |
404 | |
405 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
406 | unsafe fn test_mm256_mask_popcnt_epi32() { |
407 | let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); |
408 | let mask = 0xF0; |
409 | let actual_result = _mm256_mask_popcnt_epi32(test_data, mask, test_data); |
410 | let reference_result = _mm256_set_epi32(0, 1, 32, 1, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); |
411 | assert_eq_m256i(actual_result, reference_result); |
412 | } |
413 | |
414 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
415 | unsafe fn test_mm256_maskz_popcnt_epi32() { |
416 | let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); |
417 | let mask = 0xF0; |
418 | let actual_result = _mm256_maskz_popcnt_epi32(mask, test_data); |
419 | let reference_result = _mm256_set_epi32(0, 1, 32, 1, 0, 0, 0, 0); |
420 | assert_eq_m256i(actual_result, reference_result); |
421 | } |
422 | |
423 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
424 | unsafe fn test_mm_popcnt_epi32() { |
425 | let test_data = _mm_set_epi32(0, 1, -1, -100); |
426 | let actual_result = _mm_popcnt_epi32(test_data); |
427 | let reference_result = _mm_set_epi32(0, 1, 32, 28); |
428 | assert_eq_m128i(actual_result, reference_result); |
429 | } |
430 | |
431 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
432 | unsafe fn test_mm_mask_popcnt_epi32() { |
433 | let test_data = _mm_set_epi32(0, 1, -1, -100); |
434 | let mask = 0xE; |
435 | let actual_result = _mm_mask_popcnt_epi32(test_data, mask, test_data); |
436 | let reference_result = _mm_set_epi32(0, 1, 32, -100); |
437 | assert_eq_m128i(actual_result, reference_result); |
438 | } |
439 | |
440 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
441 | unsafe fn test_mm_maskz_popcnt_epi32() { |
442 | let test_data = _mm_set_epi32(0, 1, -1, -100); |
443 | let mask = 0xE; |
444 | let actual_result = _mm_maskz_popcnt_epi32(mask, test_data); |
445 | let reference_result = _mm_set_epi32(0, 1, 32, 0); |
446 | assert_eq_m128i(actual_result, reference_result); |
447 | } |
448 | |
449 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
450 | unsafe fn test_mm512_popcnt_epi64() { |
451 | let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); |
452 | let actual_result = _mm512_popcnt_epi64(test_data); |
453 | let reference_result = _mm512_set_epi64(0, 1, 64, 1, 3, 15, 63, 60); |
454 | assert_eq_m512i(actual_result, reference_result); |
455 | } |
456 | |
457 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
458 | unsafe fn test_mm512_mask_popcnt_epi64() { |
459 | let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); |
460 | let mask = 0xF0; |
461 | let actual_result = _mm512_mask_popcnt_epi64(test_data, mask, test_data); |
462 | let reference_result = |
463 | _mm512_set_epi64(0, 1, 64, 1, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); |
464 | assert_eq_m512i(actual_result, reference_result); |
465 | } |
466 | |
467 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
468 | unsafe fn test_mm512_maskz_popcnt_epi64() { |
469 | let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); |
470 | let mask = 0xF0; |
471 | let actual_result = _mm512_maskz_popcnt_epi64(mask, test_data); |
472 | let reference_result = _mm512_set_epi64(0, 1, 64, 1, 0, 0, 0, 0); |
473 | assert_eq_m512i(actual_result, reference_result); |
474 | } |
475 | |
476 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
477 | unsafe fn test_mm256_popcnt_epi64() { |
478 | let test_data = _mm256_set_epi64x(0, 1, -1, -100); |
479 | let actual_result = _mm256_popcnt_epi64(test_data); |
480 | let reference_result = _mm256_set_epi64x(0, 1, 64, 60); |
481 | assert_eq_m256i(actual_result, reference_result); |
482 | } |
483 | |
484 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
485 | unsafe fn test_mm256_mask_popcnt_epi64() { |
486 | let test_data = _mm256_set_epi64x(0, 1, -1, -100); |
487 | let mask = 0xE; |
488 | let actual_result = _mm256_mask_popcnt_epi64(test_data, mask, test_data); |
489 | let reference_result = _mm256_set_epi64x(0, 1, 64, -100); |
490 | assert_eq_m256i(actual_result, reference_result); |
491 | } |
492 | |
493 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
494 | unsafe fn test_mm256_maskz_popcnt_epi64() { |
495 | let test_data = _mm256_set_epi64x(0, 1, -1, -100); |
496 | let mask = 0xE; |
497 | let actual_result = _mm256_maskz_popcnt_epi64(mask, test_data); |
498 | let reference_result = _mm256_set_epi64x(0, 1, 64, 0); |
499 | assert_eq_m256i(actual_result, reference_result); |
500 | } |
501 | |
502 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
503 | unsafe fn test_mm_popcnt_epi64() { |
504 | let test_data = _mm_set_epi64x(0, 1); |
505 | let actual_result = _mm_popcnt_epi64(test_data); |
506 | let reference_result = _mm_set_epi64x(0, 1); |
507 | assert_eq_m128i(actual_result, reference_result); |
508 | let test_data = _mm_set_epi64x(-1, -100); |
509 | let actual_result = _mm_popcnt_epi64(test_data); |
510 | let reference_result = _mm_set_epi64x(64, 60); |
511 | assert_eq_m128i(actual_result, reference_result); |
512 | } |
513 | |
514 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
515 | unsafe fn test_mm_mask_popcnt_epi64() { |
516 | let test_data = _mm_set_epi64x(0, -100); |
517 | let mask = 0x2; |
518 | let actual_result = _mm_mask_popcnt_epi64(test_data, mask, test_data); |
519 | let reference_result = _mm_set_epi64x(0, -100); |
520 | assert_eq_m128i(actual_result, reference_result); |
521 | let test_data = _mm_set_epi64x(-1, 1); |
522 | let mask = 0x2; |
523 | let actual_result = _mm_mask_popcnt_epi64(test_data, mask, test_data); |
524 | let reference_result = _mm_set_epi64x(64, 1); |
525 | assert_eq_m128i(actual_result, reference_result); |
526 | } |
527 | |
528 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
529 | unsafe fn test_mm_maskz_popcnt_epi64() { |
530 | let test_data = _mm_set_epi64x(0, 1); |
531 | let mask = 0x2; |
532 | let actual_result = _mm_maskz_popcnt_epi64(mask, test_data); |
533 | let reference_result = _mm_set_epi64x(0, 0); |
534 | assert_eq_m128i(actual_result, reference_result); |
535 | let test_data = _mm_set_epi64x(-1, -100); |
536 | let mask = 0x2; |
537 | let actual_result = _mm_maskz_popcnt_epi64(mask, test_data); |
538 | let reference_result = _mm_set_epi64x(64, 0); |
539 | assert_eq_m128i(actual_result, reference_result); |
540 | } |
541 | } |
542 | |