1 | //! Vectorized Population Count Instructions for Double- and Quadwords (VPOPCNTDQ) |
2 | //! |
3 | //! The intrinsics here correspond to those in the `immintrin.h` C header. |
4 | //! |
5 | //! The reference is [Intel 64 and IA-32 Architectures Software Developer's |
6 | //! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. |
7 | //! |
8 | //! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf |
9 | |
10 | use crate::core_arch::simd::i32x16; |
11 | use crate::core_arch::simd::i32x4; |
12 | use crate::core_arch::simd::i32x8; |
13 | use crate::core_arch::simd::i64x2; |
14 | use crate::core_arch::simd::i64x4; |
15 | use crate::core_arch::simd::i64x8; |
16 | use crate::core_arch::x86::__m128i; |
17 | use crate::core_arch::x86::__m256i; |
18 | use crate::core_arch::x86::__m512i; |
19 | use crate::core_arch::x86::__mmask16; |
20 | use crate::core_arch::x86::__mmask8; |
21 | use crate::core_arch::x86::_mm256_setzero_si256; |
22 | use crate::core_arch::x86::_mm512_setzero_si512; |
23 | use crate::core_arch::x86::_mm_setzero_si128; |
24 | use crate::core_arch::x86::m128iExt; |
25 | use crate::core_arch::x86::m256iExt; |
26 | use crate::core_arch::x86::m512iExt; |
27 | use crate::intrinsics::simd::simd_select_bitmask; |
28 | use crate::mem::transmute; |
29 | |
30 | #[cfg (test)] |
31 | use stdarch_test::assert_instr; |
32 | |
33 | #[allow (improper_ctypes)] |
34 | extern "C" { |
35 | #[link_name = "llvm.ctpop.v16i32" ] |
36 | fn popcnt_v16i32(x: i32x16) -> i32x16; |
37 | #[link_name = "llvm.ctpop.v8i32" ] |
38 | fn popcnt_v8i32(x: i32x8) -> i32x8; |
39 | #[link_name = "llvm.ctpop.v4i32" ] |
40 | fn popcnt_v4i32(x: i32x4) -> i32x4; |
41 | |
42 | #[link_name = "llvm.ctpop.v8i64" ] |
43 | fn popcnt_v8i64(x: i64x8) -> i64x8; |
44 | #[link_name = "llvm.ctpop.v4i64" ] |
45 | fn popcnt_v4i64(x: i64x4) -> i64x4; |
46 | #[link_name = "llvm.ctpop.v2i64" ] |
47 | fn popcnt_v2i64(x: i64x2) -> i64x2; |
48 | } |
49 | |
50 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
51 | /// |
52 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi32) |
53 | #[inline ] |
54 | #[target_feature (enable = "avx512vpopcntdq" )] |
55 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
56 | #[cfg_attr (test, assert_instr(vpopcntd))] |
57 | pub unsafe fn _mm512_popcnt_epi32(a: __m512i) -> __m512i { |
58 | transmute(src:popcnt_v16i32(a.as_i32x16())) |
59 | } |
60 | |
61 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
62 | /// |
63 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
64 | /// Otherwise the computation result is written into the result. |
65 | /// |
66 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi32) |
67 | #[inline ] |
68 | #[target_feature (enable = "avx512vpopcntdq" )] |
69 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
70 | #[cfg_attr (test, assert_instr(vpopcntd))] |
71 | pub unsafe fn _mm512_maskz_popcnt_epi32(k: __mmask16, a: __m512i) -> __m512i { |
72 | let zero: i32x16 = _mm512_setzero_si512().as_i32x16(); |
73 | transmute(src:simd_select_bitmask(m:k, yes:popcnt_v16i32(a.as_i32x16()), no:zero)) |
74 | } |
75 | |
76 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
77 | /// |
78 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
79 | /// Otherwise the computation result is written into the result. |
80 | /// |
81 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi32) |
82 | #[inline ] |
83 | #[target_feature (enable = "avx512vpopcntdq" )] |
84 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
85 | #[cfg_attr (test, assert_instr(vpopcntd))] |
86 | pub unsafe fn _mm512_mask_popcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { |
87 | transmute(src:simd_select_bitmask( |
88 | m:k, |
89 | yes:popcnt_v16i32(a.as_i32x16()), |
90 | no:src.as_i32x16(), |
91 | )) |
92 | } |
93 | |
94 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
95 | /// |
96 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi32) |
97 | #[inline ] |
98 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
99 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
100 | #[cfg_attr (test, assert_instr(vpopcntd))] |
101 | pub unsafe fn _mm256_popcnt_epi32(a: __m256i) -> __m256i { |
102 | transmute(src:popcnt_v8i32(a.as_i32x8())) |
103 | } |
104 | |
105 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
106 | /// |
107 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
108 | /// Otherwise the computation result is written into the result. |
109 | /// |
110 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi32) |
111 | #[inline ] |
112 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
113 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
114 | #[cfg_attr (test, assert_instr(vpopcntd))] |
115 | pub unsafe fn _mm256_maskz_popcnt_epi32(k: __mmask8, a: __m256i) -> __m256i { |
116 | let zero: i32x8 = _mm256_setzero_si256().as_i32x8(); |
117 | transmute(src:simd_select_bitmask(m:k, yes:popcnt_v8i32(a.as_i32x8()), no:zero)) |
118 | } |
119 | |
120 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
121 | /// |
122 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
123 | /// Otherwise the computation result is written into the result. |
124 | /// |
125 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi32) |
126 | #[inline ] |
127 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
128 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
129 | #[cfg_attr (test, assert_instr(vpopcntd))] |
130 | pub unsafe fn _mm256_mask_popcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { |
131 | transmute(src:simd_select_bitmask( |
132 | m:k, |
133 | yes:popcnt_v8i32(a.as_i32x8()), |
134 | no:src.as_i32x8(), |
135 | )) |
136 | } |
137 | |
138 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
139 | /// |
140 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi32) |
141 | #[inline ] |
142 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
143 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
144 | #[cfg_attr (test, assert_instr(vpopcntd))] |
145 | pub unsafe fn _mm_popcnt_epi32(a: __m128i) -> __m128i { |
146 | transmute(src:popcnt_v4i32(a.as_i32x4())) |
147 | } |
148 | |
149 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
150 | /// |
151 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
152 | /// Otherwise the computation result is written into the result. |
153 | /// |
154 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi32) |
155 | #[inline ] |
156 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
157 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
158 | #[cfg_attr (test, assert_instr(vpopcntd))] |
159 | pub unsafe fn _mm_maskz_popcnt_epi32(k: __mmask8, a: __m128i) -> __m128i { |
160 | let zero: i32x4 = _mm_setzero_si128().as_i32x4(); |
161 | transmute(src:simd_select_bitmask(m:k, yes:popcnt_v4i32(a.as_i32x4()), no:zero)) |
162 | } |
163 | |
164 | /// For each packed 32-bit integer maps the value to the number of logical 1 bits. |
165 | /// |
166 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
167 | /// Otherwise the computation result is written into the result. |
168 | /// |
169 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi32) |
170 | #[inline ] |
171 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
172 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
173 | #[cfg_attr (test, assert_instr(vpopcntd))] |
174 | pub unsafe fn _mm_mask_popcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
175 | transmute(src:simd_select_bitmask( |
176 | m:k, |
177 | yes:popcnt_v4i32(a.as_i32x4()), |
178 | no:src.as_i32x4(), |
179 | )) |
180 | } |
181 | |
182 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
183 | /// |
184 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_popcnt_epi64) |
185 | #[inline ] |
186 | #[target_feature (enable = "avx512vpopcntdq" )] |
187 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
188 | #[cfg_attr (test, assert_instr(vpopcntq))] |
189 | pub unsafe fn _mm512_popcnt_epi64(a: __m512i) -> __m512i { |
190 | transmute(src:popcnt_v8i64(a.as_i64x8())) |
191 | } |
192 | |
193 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
194 | /// |
195 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
196 | /// Otherwise the computation result is written into the result. |
197 | /// |
198 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_popcnt_epi64) |
199 | #[inline ] |
200 | #[target_feature (enable = "avx512vpopcntdq" )] |
201 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
202 | #[cfg_attr (test, assert_instr(vpopcntq))] |
203 | pub unsafe fn _mm512_maskz_popcnt_epi64(k: __mmask8, a: __m512i) -> __m512i { |
204 | let zero: i64x8 = _mm512_setzero_si512().as_i64x8(); |
205 | transmute(src:simd_select_bitmask(m:k, yes:popcnt_v8i64(a.as_i64x8()), no:zero)) |
206 | } |
207 | |
208 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
209 | /// |
210 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
211 | /// Otherwise the computation result is written into the result. |
212 | /// |
213 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_popcnt_epi64) |
214 | #[inline ] |
215 | #[target_feature (enable = "avx512vpopcntdq" )] |
216 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
217 | #[cfg_attr (test, assert_instr(vpopcntq))] |
218 | pub unsafe fn _mm512_mask_popcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { |
219 | transmute(src:simd_select_bitmask( |
220 | m:k, |
221 | yes:popcnt_v8i64(a.as_i64x8()), |
222 | no:src.as_i64x8(), |
223 | )) |
224 | } |
225 | |
226 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
227 | /// |
228 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_popcnt_epi64) |
229 | #[inline ] |
230 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
231 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
232 | #[cfg_attr (test, assert_instr(vpopcntq))] |
233 | pub unsafe fn _mm256_popcnt_epi64(a: __m256i) -> __m256i { |
234 | transmute(src:popcnt_v4i64(a.as_i64x4())) |
235 | } |
236 | |
237 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
238 | /// |
239 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
240 | /// Otherwise the computation result is written into the result. |
241 | /// |
242 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_popcnt_epi64) |
243 | #[inline ] |
244 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
245 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
246 | #[cfg_attr (test, assert_instr(vpopcntq))] |
247 | pub unsafe fn _mm256_maskz_popcnt_epi64(k: __mmask8, a: __m256i) -> __m256i { |
248 | let zero: i64x4 = _mm256_setzero_si256().as_i64x4(); |
249 | transmute(src:simd_select_bitmask(m:k, yes:popcnt_v4i64(a.as_i64x4()), no:zero)) |
250 | } |
251 | |
252 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
253 | /// |
254 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
255 | /// Otherwise the computation result is written into the result. |
256 | /// |
257 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_popcnt_epi64) |
258 | #[inline ] |
259 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
260 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
261 | #[cfg_attr (test, assert_instr(vpopcntq))] |
262 | pub unsafe fn _mm256_mask_popcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { |
263 | transmute(src:simd_select_bitmask( |
264 | m:k, |
265 | yes:popcnt_v4i64(a.as_i64x4()), |
266 | no:src.as_i64x4(), |
267 | )) |
268 | } |
269 | |
270 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
271 | /// |
272 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_popcnt_epi64) |
273 | #[inline ] |
274 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
275 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
276 | #[cfg_attr (test, assert_instr(vpopcntq))] |
277 | pub unsafe fn _mm_popcnt_epi64(a: __m128i) -> __m128i { |
278 | transmute(src:popcnt_v2i64(a.as_i64x2())) |
279 | } |
280 | |
281 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
282 | /// |
283 | /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. |
284 | /// Otherwise the computation result is written into the result. |
285 | /// |
286 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_popcnt_epi64) |
287 | #[inline ] |
288 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
289 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
290 | #[cfg_attr (test, assert_instr(vpopcntq))] |
291 | pub unsafe fn _mm_maskz_popcnt_epi64(k: __mmask8, a: __m128i) -> __m128i { |
292 | let zero: i64x2 = _mm_setzero_si128().as_i64x2(); |
293 | transmute(src:simd_select_bitmask(m:k, yes:popcnt_v2i64(a.as_i64x2()), no:zero)) |
294 | } |
295 | |
296 | /// For each packed 64-bit integer maps the value to the number of logical 1 bits. |
297 | /// |
298 | /// Uses the writemask in k - elements are copied from src if the corresponding mask bit is not set. |
299 | /// Otherwise the computation result is written into the result. |
300 | /// |
301 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_popcnt_epi64) |
302 | #[inline ] |
303 | #[target_feature (enable = "avx512vpopcntdq,avx512vl" )] |
304 | #[unstable (feature = "stdarch_x86_avx512" , issue = "111137" )] |
305 | #[cfg_attr (test, assert_instr(vpopcntq))] |
306 | pub unsafe fn _mm_mask_popcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
307 | transmute(src:simd_select_bitmask( |
308 | m:k, |
309 | yes:popcnt_v2i64(a.as_i64x2()), |
310 | no:src.as_i64x2(), |
311 | )) |
312 | } |
313 | |
314 | #[cfg (test)] |
315 | mod tests { |
316 | use stdarch_test::simd_test; |
317 | |
318 | use crate::core_arch::x86::*; |
319 | |
320 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
321 | unsafe fn test_mm512_popcnt_epi32() { |
322 | let test_data = _mm512_set_epi32( |
323 | 0, |
324 | 1, |
325 | -1, |
326 | 2, |
327 | 7, |
328 | 0xFF_FE, |
329 | 0x7F_FF_FF_FF, |
330 | -100, |
331 | 0x40_00_00_00, |
332 | 103, |
333 | 371, |
334 | 552, |
335 | 432_948, |
336 | 818_826_998, |
337 | 255, |
338 | 256, |
339 | ); |
340 | let actual_result = _mm512_popcnt_epi32(test_data); |
341 | let reference_result = |
342 | _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 1, 5, 6, 3, 10, 17, 8, 1); |
343 | assert_eq_m512i(actual_result, reference_result); |
344 | } |
345 | |
346 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
347 | unsafe fn test_mm512_mask_popcnt_epi32() { |
348 | let test_data = _mm512_set_epi32( |
349 | 0, |
350 | 1, |
351 | -1, |
352 | 2, |
353 | 7, |
354 | 0xFF_FE, |
355 | 0x7F_FF_FF_FF, |
356 | -100, |
357 | 0x40_00_00_00, |
358 | 103, |
359 | 371, |
360 | 552, |
361 | 432_948, |
362 | 818_826_998, |
363 | 255, |
364 | 256, |
365 | ); |
366 | let mask = 0xFF_00; |
367 | let actual_result = _mm512_mask_popcnt_epi32(test_data, mask, test_data); |
368 | let reference_result = _mm512_set_epi32( |
369 | 0, |
370 | 1, |
371 | 32, |
372 | 1, |
373 | 3, |
374 | 15, |
375 | 31, |
376 | 28, |
377 | 0x40_00_00_00, |
378 | 103, |
379 | 371, |
380 | 552, |
381 | 432_948, |
382 | 818_826_998, |
383 | 255, |
384 | 256, |
385 | ); |
386 | assert_eq_m512i(actual_result, reference_result); |
387 | } |
388 | |
389 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
390 | unsafe fn test_mm512_maskz_popcnt_epi32() { |
391 | let test_data = _mm512_set_epi32( |
392 | 0, |
393 | 1, |
394 | -1, |
395 | 2, |
396 | 7, |
397 | 0xFF_FE, |
398 | 0x7F_FF_FF_FF, |
399 | -100, |
400 | 0x40_00_00_00, |
401 | 103, |
402 | 371, |
403 | 552, |
404 | 432_948, |
405 | 818_826_998, |
406 | 255, |
407 | 256, |
408 | ); |
409 | let mask = 0xFF_00; |
410 | let actual_result = _mm512_maskz_popcnt_epi32(mask, test_data); |
411 | let reference_result = _mm512_set_epi32(0, 1, 32, 1, 3, 15, 31, 28, 0, 0, 0, 0, 0, 0, 0, 0); |
412 | assert_eq_m512i(actual_result, reference_result); |
413 | } |
414 | |
415 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
416 | unsafe fn test_mm256_popcnt_epi32() { |
417 | let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); |
418 | let actual_result = _mm256_popcnt_epi32(test_data); |
419 | let reference_result = _mm256_set_epi32(0, 1, 32, 1, 3, 15, 31, 28); |
420 | assert_eq_m256i(actual_result, reference_result); |
421 | } |
422 | |
423 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
424 | unsafe fn test_mm256_mask_popcnt_epi32() { |
425 | let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); |
426 | let mask = 0xF0; |
427 | let actual_result = _mm256_mask_popcnt_epi32(test_data, mask, test_data); |
428 | let reference_result = _mm256_set_epi32(0, 1, 32, 1, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); |
429 | assert_eq_m256i(actual_result, reference_result); |
430 | } |
431 | |
432 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
433 | unsafe fn test_mm256_maskz_popcnt_epi32() { |
434 | let test_data = _mm256_set_epi32(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF, -100); |
435 | let mask = 0xF0; |
436 | let actual_result = _mm256_maskz_popcnt_epi32(mask, test_data); |
437 | let reference_result = _mm256_set_epi32(0, 1, 32, 1, 0, 0, 0, 0); |
438 | assert_eq_m256i(actual_result, reference_result); |
439 | } |
440 | |
441 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
442 | unsafe fn test_mm_popcnt_epi32() { |
443 | let test_data = _mm_set_epi32(0, 1, -1, -100); |
444 | let actual_result = _mm_popcnt_epi32(test_data); |
445 | let reference_result = _mm_set_epi32(0, 1, 32, 28); |
446 | assert_eq_m128i(actual_result, reference_result); |
447 | } |
448 | |
449 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
450 | unsafe fn test_mm_mask_popcnt_epi32() { |
451 | let test_data = _mm_set_epi32(0, 1, -1, -100); |
452 | let mask = 0xE; |
453 | let actual_result = _mm_mask_popcnt_epi32(test_data, mask, test_data); |
454 | let reference_result = _mm_set_epi32(0, 1, 32, -100); |
455 | assert_eq_m128i(actual_result, reference_result); |
456 | } |
457 | |
458 | #[simd_test(enable = "avx512vpopcntdq,avx512f,avx512vl" )] |
459 | unsafe fn test_mm_maskz_popcnt_epi32() { |
460 | let test_data = _mm_set_epi32(0, 1, -1, -100); |
461 | let mask = 0xE; |
462 | let actual_result = _mm_maskz_popcnt_epi32(mask, test_data); |
463 | let reference_result = _mm_set_epi32(0, 1, 32, 0); |
464 | assert_eq_m128i(actual_result, reference_result); |
465 | } |
466 | |
467 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
468 | unsafe fn test_mm512_popcnt_epi64() { |
469 | let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); |
470 | let actual_result = _mm512_popcnt_epi64(test_data); |
471 | let reference_result = _mm512_set_epi64(0, 1, 64, 1, 3, 15, 63, 60); |
472 | assert_eq_m512i(actual_result, reference_result); |
473 | } |
474 | |
475 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
476 | unsafe fn test_mm512_mask_popcnt_epi64() { |
477 | let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); |
478 | let mask = 0xF0; |
479 | let actual_result = _mm512_mask_popcnt_epi64(test_data, mask, test_data); |
480 | let reference_result = |
481 | _mm512_set_epi64(0, 1, 64, 1, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); |
482 | assert_eq_m512i(actual_result, reference_result); |
483 | } |
484 | |
485 | #[simd_test(enable = "avx512vpopcntdq,avx512f" )] |
486 | unsafe fn test_mm512_maskz_popcnt_epi64() { |
487 | let test_data = _mm512_set_epi64(0, 1, -1, 2, 7, 0xFF_FE, 0x7F_FF_FF_FF_FF_FF_FF_FF, -100); |
488 | let mask = 0xF0; |
489 | let actual_result = _mm512_maskz_popcnt_epi64(mask, test_data); |
490 | let reference_result = _mm512_set_epi64(0, 1, 64, 1, 0, 0, 0, 0); |
491 | assert_eq_m512i(actual_result, reference_result); |
492 | } |
493 | |
494 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
495 | unsafe fn test_mm256_popcnt_epi64() { |
496 | let test_data = _mm256_set_epi64x(0, 1, -1, -100); |
497 | let actual_result = _mm256_popcnt_epi64(test_data); |
498 | let reference_result = _mm256_set_epi64x(0, 1, 64, 60); |
499 | assert_eq_m256i(actual_result, reference_result); |
500 | } |
501 | |
502 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
503 | unsafe fn test_mm256_mask_popcnt_epi64() { |
504 | let test_data = _mm256_set_epi64x(0, 1, -1, -100); |
505 | let mask = 0xE; |
506 | let actual_result = _mm256_mask_popcnt_epi64(test_data, mask, test_data); |
507 | let reference_result = _mm256_set_epi64x(0, 1, 64, -100); |
508 | assert_eq_m256i(actual_result, reference_result); |
509 | } |
510 | |
511 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
512 | unsafe fn test_mm256_maskz_popcnt_epi64() { |
513 | let test_data = _mm256_set_epi64x(0, 1, -1, -100); |
514 | let mask = 0xE; |
515 | let actual_result = _mm256_maskz_popcnt_epi64(mask, test_data); |
516 | let reference_result = _mm256_set_epi64x(0, 1, 64, 0); |
517 | assert_eq_m256i(actual_result, reference_result); |
518 | } |
519 | |
520 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
521 | unsafe fn test_mm_popcnt_epi64() { |
522 | let test_data = _mm_set_epi64x(0, 1); |
523 | let actual_result = _mm_popcnt_epi64(test_data); |
524 | let reference_result = _mm_set_epi64x(0, 1); |
525 | assert_eq_m128i(actual_result, reference_result); |
526 | let test_data = _mm_set_epi64x(-1, -100); |
527 | let actual_result = _mm_popcnt_epi64(test_data); |
528 | let reference_result = _mm_set_epi64x(64, 60); |
529 | assert_eq_m128i(actual_result, reference_result); |
530 | } |
531 | |
532 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
533 | unsafe fn test_mm_mask_popcnt_epi64() { |
534 | let test_data = _mm_set_epi64x(0, -100); |
535 | let mask = 0x2; |
536 | let actual_result = _mm_mask_popcnt_epi64(test_data, mask, test_data); |
537 | let reference_result = _mm_set_epi64x(0, -100); |
538 | assert_eq_m128i(actual_result, reference_result); |
539 | let test_data = _mm_set_epi64x(-1, 1); |
540 | let mask = 0x2; |
541 | let actual_result = _mm_mask_popcnt_epi64(test_data, mask, test_data); |
542 | let reference_result = _mm_set_epi64x(64, 1); |
543 | assert_eq_m128i(actual_result, reference_result); |
544 | } |
545 | |
546 | #[simd_test(enable = "avx512vpopcntdq,avx512vl" )] |
547 | unsafe fn test_mm_maskz_popcnt_epi64() { |
548 | let test_data = _mm_set_epi64x(0, 1); |
549 | let mask = 0x2; |
550 | let actual_result = _mm_maskz_popcnt_epi64(mask, test_data); |
551 | let reference_result = _mm_set_epi64x(0, 0); |
552 | assert_eq_m128i(actual_result, reference_result); |
553 | let test_data = _mm_set_epi64x(-1, -100); |
554 | let mask = 0x2; |
555 | let actual_result = _mm_maskz_popcnt_epi64(mask, test_data); |
556 | let reference_result = _mm_set_epi64x(64, 0); |
557 | assert_eq_m128i(actual_result, reference_result); |
558 | } |
559 | } |
560 | |