1 | use crate::{ |
2 | core_arch::{simd::*, simd_llvm::*, x86::*}, |
3 | mem::transmute, |
4 | }; |
5 | |
6 | #[cfg (test)] |
7 | use stdarch_test::assert_instr; |
8 | |
9 | /// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst. |
10 | /// |
11 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastmw_epi32&expand=553) |
12 | #[inline ] |
13 | #[target_feature (enable = "avx512cd" )] |
14 | #[cfg_attr (test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d |
15 | pub unsafe fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i { |
16 | _mm512_set1_epi32(k as i32) |
17 | } |
18 | |
19 | /// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst. |
20 | /// |
21 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastmw_epi32&expand=552) |
22 | #[inline ] |
23 | #[target_feature (enable = "avx512cd,avx512vl" )] |
24 | #[cfg_attr (test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d |
25 | pub unsafe fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i { |
26 | _mm256_set1_epi32(k as i32) |
27 | } |
28 | |
29 | /// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst. |
30 | /// |
31 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastmw_epi32&expand=551) |
32 | #[inline ] |
33 | #[target_feature (enable = "avx512cd,avx512vl" )] |
34 | #[cfg_attr (test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d |
35 | pub unsafe fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i { |
36 | _mm_set1_epi32(k as i32) |
37 | } |
38 | |
39 | /// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst. |
40 | /// |
41 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastmb_epi64&expand=550) |
42 | #[inline ] |
43 | #[target_feature (enable = "avx512cd" )] |
44 | #[cfg_attr (test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q |
45 | pub unsafe fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i { |
46 | _mm512_set1_epi64(k as i64) |
47 | } |
48 | |
49 | /// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst. |
50 | /// |
51 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcastmb_epi64&expand=549) |
52 | #[inline ] |
53 | #[target_feature (enable = "avx512cd,avx512vl" )] |
54 | #[cfg_attr (test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q |
55 | pub unsafe fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i { |
56 | _mm256_set1_epi64x(k as i64) |
57 | } |
58 | |
59 | /// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst. |
60 | /// |
61 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcastmb_epi64&expand=548) |
62 | #[inline ] |
63 | #[target_feature (enable = "avx512cd,avx512vl" )] |
64 | #[cfg_attr (test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q |
65 | pub unsafe fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i { |
66 | _mm_set1_epi64x(k as i64) |
67 | } |
68 | |
69 | /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. |
70 | /// |
71 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_conflict_epi32&expand=1248) |
72 | #[inline ] |
73 | #[target_feature (enable = "avx512cd" )] |
74 | #[cfg_attr (test, assert_instr(vpconflictd))] |
75 | pub unsafe fn _mm512_conflict_epi32(a: __m512i) -> __m512i { |
76 | transmute(src:vpconflictd(a.as_i32x16())) |
77 | } |
78 | |
79 | /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
80 | /// |
81 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_conflict_epi32&expand=1249) |
82 | #[inline ] |
83 | #[target_feature (enable = "avx512cd" )] |
84 | #[cfg_attr (test, assert_instr(vpconflictd))] |
85 | pub unsafe fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { |
86 | let conflict: i32x16 = _mm512_conflict_epi32(a).as_i32x16(); |
87 | transmute(src:simd_select_bitmask(m:k, a:conflict, b:src.as_i32x16())) |
88 | } |
89 | |
90 | /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
91 | /// |
92 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_conflict_epi32&expand=1250) |
93 | #[inline ] |
94 | #[target_feature (enable = "avx512cd" )] |
95 | #[cfg_attr (test, assert_instr(vpconflictd))] |
96 | pub unsafe fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i { |
97 | let conflict: i32x16 = _mm512_conflict_epi32(a).as_i32x16(); |
98 | let zero: i32x16 = _mm512_setzero_si512().as_i32x16(); |
99 | transmute(src:simd_select_bitmask(m:k, a:conflict, b:zero)) |
100 | } |
101 | |
102 | /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. |
103 | /// |
104 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_conflict_epi32&expand=1245) |
105 | #[inline ] |
106 | #[target_feature (enable = "avx512cd,avx512vl" )] |
107 | #[cfg_attr (test, assert_instr(vpconflictd))] |
108 | pub unsafe fn _mm256_conflict_epi32(a: __m256i) -> __m256i { |
109 | transmute(src:vpconflictd256(a.as_i32x8())) |
110 | } |
111 | |
112 | /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
113 | /// |
114 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_conflict_epi32&expand=1246) |
115 | #[inline ] |
116 | #[target_feature (enable = "avx512cd,avx512vl" )] |
117 | #[cfg_attr (test, assert_instr(vpconflictd))] |
118 | pub unsafe fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { |
119 | let conflict: i32x8 = _mm256_conflict_epi32(a).as_i32x8(); |
120 | transmute(src:simd_select_bitmask(m:k, a:conflict, b:src.as_i32x8())) |
121 | } |
122 | |
123 | /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
124 | /// |
125 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_conflict_epi32&expand=1247) |
126 | #[inline ] |
127 | #[target_feature (enable = "avx512cd,avx512vl" )] |
128 | #[cfg_attr (test, assert_instr(vpconflictd))] |
129 | pub unsafe fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i { |
130 | let conflict: i32x8 = _mm256_conflict_epi32(a).as_i32x8(); |
131 | let zero: i32x8 = _mm256_setzero_si256().as_i32x8(); |
132 | transmute(src:simd_select_bitmask(m:k, a:conflict, b:zero)) |
133 | } |
134 | |
135 | /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. |
136 | /// |
137 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_conflict_epi32&expand=1242) |
138 | #[inline ] |
139 | #[target_feature (enable = "avx512cd,avx512vl" )] |
140 | #[cfg_attr (test, assert_instr(vpconflictd))] |
141 | pub unsafe fn _mm_conflict_epi32(a: __m128i) -> __m128i { |
142 | transmute(src:vpconflictd128(a.as_i32x4())) |
143 | } |
144 | |
145 | /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
146 | /// |
147 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_conflict_epi32&expand=1243) |
148 | #[inline ] |
149 | #[target_feature (enable = "avx512cd,avx512vl" )] |
150 | #[cfg_attr (test, assert_instr(vpconflictd))] |
151 | pub unsafe fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
152 | let conflict: i32x4 = _mm_conflict_epi32(a).as_i32x4(); |
153 | transmute(src:simd_select_bitmask(m:k, a:conflict, b:src.as_i32x4())) |
154 | } |
155 | |
156 | /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
157 | /// |
158 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_conflict_epi32&expand=1244) |
159 | #[inline ] |
160 | #[target_feature (enable = "avx512cd,avx512vl" )] |
161 | #[cfg_attr (test, assert_instr(vpconflictd))] |
162 | pub unsafe fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i { |
163 | let conflict: i32x4 = _mm_conflict_epi32(a).as_i32x4(); |
164 | let zero: i32x4 = _mm_setzero_si128().as_i32x4(); |
165 | transmute(src:simd_select_bitmask(m:k, a:conflict, b:zero)) |
166 | } |
167 | |
168 | /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. |
169 | /// |
170 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_conflict_epi64&expand=1257) |
171 | #[inline ] |
172 | #[target_feature (enable = "avx512cd" )] |
173 | #[cfg_attr (test, assert_instr(vpconflictq))] |
174 | pub unsafe fn _mm512_conflict_epi64(a: __m512i) -> __m512i { |
175 | transmute(src:vpconflictq(a.as_i64x8())) |
176 | } |
177 | |
178 | /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
179 | /// |
180 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_conflict_epi64&expand=1258) |
181 | #[inline ] |
182 | #[target_feature (enable = "avx512cd" )] |
183 | #[cfg_attr (test, assert_instr(vpconflictq))] |
184 | pub unsafe fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { |
185 | let conflict: i64x8 = _mm512_conflict_epi64(a).as_i64x8(); |
186 | transmute(src:simd_select_bitmask(m:k, a:conflict, b:src.as_i64x8())) |
187 | } |
188 | |
189 | /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
190 | /// |
191 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_conflict_epi64&expand=1259) |
192 | #[inline ] |
193 | #[target_feature (enable = "avx512cd" )] |
194 | #[cfg_attr (test, assert_instr(vpconflictq))] |
195 | pub unsafe fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i { |
196 | let conflict: i64x8 = _mm512_conflict_epi64(a).as_i64x8(); |
197 | let zero: i64x8 = _mm512_setzero_si512().as_i64x8(); |
198 | transmute(src:simd_select_bitmask(m:k, a:conflict, b:zero)) |
199 | } |
200 | |
201 | /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. |
202 | /// |
203 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_conflict_epi64&expand=1254) |
204 | #[inline ] |
205 | #[target_feature (enable = "avx512cd,avx512vl" )] |
206 | #[cfg_attr (test, assert_instr(vpconflictq))] |
207 | pub unsafe fn _mm256_conflict_epi64(a: __m256i) -> __m256i { |
208 | transmute(src:vpconflictq256(a.as_i64x4())) |
209 | } |
210 | |
211 | /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
212 | /// |
213 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_conflict_epi64&expand=1255) |
214 | #[inline ] |
215 | #[target_feature (enable = "avx512cd,avx512vl" )] |
216 | #[cfg_attr (test, assert_instr(vpconflictq))] |
217 | pub unsafe fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { |
218 | let conflict: i64x4 = _mm256_conflict_epi64(a).as_i64x4(); |
219 | transmute(src:simd_select_bitmask(m:k, a:conflict, b:src.as_i64x4())) |
220 | } |
221 | |
222 | /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
223 | /// |
224 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_conflict_epi64&expand=1256) |
225 | #[inline ] |
226 | #[target_feature (enable = "avx512cd,avx512vl" )] |
227 | #[cfg_attr (test, assert_instr(vpconflictq))] |
228 | pub unsafe fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i { |
229 | let conflict: i64x4 = _mm256_conflict_epi64(a).as_i64x4(); |
230 | let zero: i64x4 = _mm256_setzero_si256().as_i64x4(); |
231 | transmute(src:simd_select_bitmask(m:k, a:conflict, b:zero)) |
232 | } |
233 | |
234 | /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. |
235 | /// |
236 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_conflict_epi64&expand=1251) |
237 | #[inline ] |
238 | #[target_feature (enable = "avx512cd,avx512vl" )] |
239 | #[cfg_attr (test, assert_instr(vpconflictq))] |
240 | pub unsafe fn _mm_conflict_epi64(a: __m128i) -> __m128i { |
241 | transmute(src:vpconflictq128(a.as_i64x2())) |
242 | } |
243 | |
244 | /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
245 | /// |
246 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_conflict_epi64&expand=1252) |
247 | #[inline ] |
248 | #[target_feature (enable = "avx512cd,avx512vl" )] |
249 | #[cfg_attr (test, assert_instr(vpconflictq))] |
250 | pub unsafe fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
251 | let conflict: i64x2 = _mm_conflict_epi64(a).as_i64x2(); |
252 | transmute(src:simd_select_bitmask(m:k, a:conflict, b:src.as_i64x2())) |
253 | } |
254 | |
255 | /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
256 | /// |
257 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_conflict_epi64&expand=1253) |
258 | #[inline ] |
259 | #[target_feature (enable = "avx512cd,avx512vl" )] |
260 | #[cfg_attr (test, assert_instr(vpconflictq))] |
261 | pub unsafe fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i { |
262 | let conflict: i64x2 = _mm_conflict_epi64(a).as_i64x2(); |
263 | let zero: i64x2 = _mm_setzero_si128().as_i64x2(); |
264 | transmute(src:simd_select_bitmask(m:k, a:conflict, b:zero)) |
265 | } |
266 | |
267 | /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst. |
268 | /// |
269 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_lzcnt_epi32&expand=3491) |
270 | #[inline ] |
271 | #[target_feature (enable = "avx512cd" )] |
272 | #[cfg_attr (test, assert_instr(vplzcntd))] |
273 | pub unsafe fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i { |
274 | transmute(src:vplzcntd(a:a.as_i32x16(), nonzero:false)) |
275 | } |
276 | |
277 | /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
278 | /// |
279 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_lzcnt_epi32&expand=3492) |
280 | #[inline ] |
281 | #[target_feature (enable = "avx512cd" )] |
282 | #[cfg_attr (test, assert_instr(vplzcntd))] |
283 | pub unsafe fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { |
284 | let zerocount: i32x16 = _mm512_lzcnt_epi32(a).as_i32x16(); |
285 | transmute(src:simd_select_bitmask(m:k, a:zerocount, b:src.as_i32x16())) |
286 | } |
287 | |
288 | /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
289 | /// |
290 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_lzcnt_epi32&expand=3493) |
291 | #[inline ] |
292 | #[target_feature (enable = "avx512cd" )] |
293 | #[cfg_attr (test, assert_instr(vplzcntd))] |
294 | pub unsafe fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i { |
295 | let zerocount: i32x16 = _mm512_lzcnt_epi32(a).as_i32x16(); |
296 | let zero: i32x16 = _mm512_setzero_si512().as_i32x16(); |
297 | transmute(src:simd_select_bitmask(m:k, a:zerocount, b:zero)) |
298 | } |
299 | |
300 | /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst. |
301 | /// |
302 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_lzcnt_epi32&expand=3488) |
303 | #[inline ] |
304 | #[target_feature (enable = "avx512cd,avx512vl" )] |
305 | #[cfg_attr (test, assert_instr(vplzcntd))] |
306 | pub unsafe fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i { |
307 | transmute(src:vplzcntd256(a:a.as_i32x8(), nonzero:false)) |
308 | } |
309 | |
310 | /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
311 | /// |
312 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_lzcnt_epi32&expand=3489) |
313 | #[inline ] |
314 | #[target_feature (enable = "avx512cd,avx512vl" )] |
315 | #[cfg_attr (test, assert_instr(vplzcntd))] |
316 | pub unsafe fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { |
317 | let zerocount: i32x8 = _mm256_lzcnt_epi32(a).as_i32x8(); |
318 | transmute(src:simd_select_bitmask(m:k, a:zerocount, b:src.as_i32x8())) |
319 | } |
320 | |
321 | /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
322 | /// |
323 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_lzcnt_epi32&expand=3490) |
324 | #[inline ] |
325 | #[target_feature (enable = "avx512cd,avx512vl" )] |
326 | #[cfg_attr (test, assert_instr(vplzcntd))] |
327 | pub unsafe fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i { |
328 | let zerocount: i32x8 = _mm256_lzcnt_epi32(a).as_i32x8(); |
329 | let zero: i32x8 = _mm256_setzero_si256().as_i32x8(); |
330 | transmute(src:simd_select_bitmask(m:k, a:zerocount, b:zero)) |
331 | } |
332 | |
333 | /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst. |
334 | /// |
335 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lzcnt_epi32&expand=3485) |
336 | #[inline ] |
337 | #[target_feature (enable = "avx512cd,avx512vl" )] |
338 | #[cfg_attr (test, assert_instr(vplzcntd))] |
339 | pub unsafe fn _mm_lzcnt_epi32(a: __m128i) -> __m128i { |
340 | transmute(src:vplzcntd128(a:a.as_i32x4(), nonzero:false)) |
341 | } |
342 | |
343 | /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
344 | /// |
345 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_lzcnt_epi32&expand=3486) |
346 | #[inline ] |
347 | #[target_feature (enable = "avx512cd,avx512vl" )] |
348 | #[cfg_attr (test, assert_instr(vplzcntd))] |
349 | pub unsafe fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
350 | let zerocount: i32x4 = _mm_lzcnt_epi32(a).as_i32x4(); |
351 | transmute(src:simd_select_bitmask(m:k, a:zerocount, b:src.as_i32x4())) |
352 | } |
353 | |
354 | /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
355 | /// |
356 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_lzcnt_epi32&expand=3487) |
357 | #[inline ] |
358 | #[target_feature (enable = "avx512cd,avx512vl" )] |
359 | #[cfg_attr (test, assert_instr(vplzcntd))] |
360 | pub unsafe fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i { |
361 | let zerocount: i32x4 = _mm_lzcnt_epi32(a).as_i32x4(); |
362 | let zero: i32x4 = _mm_setzero_si128().as_i32x4(); |
363 | transmute(src:simd_select_bitmask(m:k, a:zerocount, b:zero)) |
364 | } |
365 | |
366 | /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst. |
367 | /// |
368 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_lzcnt_epi64&expand=3500) |
369 | #[inline ] |
370 | #[target_feature (enable = "avx512cd" )] |
371 | #[cfg_attr (test, assert_instr(vplzcntq))] |
372 | pub unsafe fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i { |
373 | transmute(src:vplzcntq(a:a.as_i64x8(), nonzero:false)) |
374 | } |
375 | |
376 | /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
377 | /// |
378 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_lzcnt_epi64&expand=3501) |
379 | #[inline ] |
380 | #[target_feature (enable = "avx512cd" )] |
381 | #[cfg_attr (test, assert_instr(vplzcntq))] |
382 | pub unsafe fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { |
383 | let zerocount: i64x8 = _mm512_lzcnt_epi64(a).as_i64x8(); |
384 | transmute(src:simd_select_bitmask(m:k, a:zerocount, b:src.as_i64x8())) |
385 | } |
386 | |
387 | /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
388 | /// |
389 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_lzcnt_epi64&expand=3502) |
390 | #[inline ] |
391 | #[target_feature (enable = "avx512cd" )] |
392 | #[cfg_attr (test, assert_instr(vplzcntq))] |
393 | pub unsafe fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i { |
394 | let zerocount: i64x8 = _mm512_lzcnt_epi64(a).as_i64x8(); |
395 | let zero: i64x8 = _mm512_setzero_si512().as_i64x8(); |
396 | transmute(src:simd_select_bitmask(m:k, a:zerocount, b:zero)) |
397 | } |
398 | |
399 | /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst. |
400 | /// |
401 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_lzcnt_epi64&expand=3497) |
402 | #[inline ] |
403 | #[target_feature (enable = "avx512cd,avx512vl" )] |
404 | #[cfg_attr (test, assert_instr(vplzcntq))] |
405 | pub unsafe fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i { |
406 | transmute(src:vplzcntq256(a:a.as_i64x4(), nonzero:false)) |
407 | } |
408 | |
409 | /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
410 | /// |
411 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_lzcnt_epi64&expand=3498) |
412 | #[inline ] |
413 | #[target_feature (enable = "avx512cd,avx512vl" )] |
414 | #[cfg_attr (test, assert_instr(vplzcntq))] |
415 | pub unsafe fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { |
416 | let zerocount: i64x4 = _mm256_lzcnt_epi64(a).as_i64x4(); |
417 | transmute(src:simd_select_bitmask(m:k, a:zerocount, b:src.as_i64x4())) |
418 | } |
419 | |
420 | /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
421 | /// |
422 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_lzcnt_epi64&expand=3499) |
423 | #[inline ] |
424 | #[target_feature (enable = "avx512cd,avx512vl" )] |
425 | #[cfg_attr (test, assert_instr(vplzcntq))] |
426 | pub unsafe fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i { |
427 | let zerocount: i64x4 = _mm256_lzcnt_epi64(a).as_i64x4(); |
428 | let zero: i64x4 = _mm256_setzero_si256().as_i64x4(); |
429 | transmute(src:simd_select_bitmask(m:k, a:zerocount, b:zero)) |
430 | } |
431 | |
432 | /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst. |
433 | /// |
434 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_lzcnt_epi64&expand=3494) |
435 | #[inline ] |
436 | #[target_feature (enable = "avx512cd,avx512vl" )] |
437 | #[cfg_attr (test, assert_instr(vplzcntq))] |
438 | pub unsafe fn _mm_lzcnt_epi64(a: __m128i) -> __m128i { |
439 | transmute(src:vplzcntq128(a:a.as_i64x2(), nonzero:false)) |
440 | } |
441 | |
442 | /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
443 | /// |
444 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_lzcnt_epi64&expand=3495) |
445 | #[inline ] |
446 | #[target_feature (enable = "avx512cd,avx512vl" )] |
447 | #[cfg_attr (test, assert_instr(vplzcntq))] |
448 | pub unsafe fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
449 | let zerocount: i64x2 = _mm_lzcnt_epi64(a).as_i64x2(); |
450 | transmute(src:simd_select_bitmask(m:k, a:zerocount, b:src.as_i64x2())) |
451 | } |
452 | |
453 | /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
454 | /// |
455 | /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_lzcnt_epi64&expand=3496) |
456 | #[inline ] |
457 | #[target_feature (enable = "avx512cd,avx512vl" )] |
458 | #[cfg_attr (test, assert_instr(vplzcntq))] |
459 | pub unsafe fn _mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i { |
460 | let zerocount: i64x2 = _mm_lzcnt_epi64(a).as_i64x2(); |
461 | let zero: i64x2 = _mm_setzero_si128().as_i64x2(); |
462 | transmute(src:simd_select_bitmask(m:k, a:zerocount, b:zero)) |
463 | } |
464 | |
465 | #[allow (improper_ctypes)] |
466 | extern "C" { |
467 | #[link_name = "llvm.x86.avx512.conflict.d.512" ] |
468 | fn vpconflictd(a: i32x16) -> i32x16; |
469 | #[link_name = "llvm.x86.avx512.conflict.d.256" ] |
470 | fn vpconflictd256(a: i32x8) -> i32x8; |
471 | #[link_name = "llvm.x86.avx512.conflict.d.128" ] |
472 | fn vpconflictd128(a: i32x4) -> i32x4; |
473 | |
474 | #[link_name = "llvm.x86.avx512.conflict.q.512" ] |
475 | fn vpconflictq(a: i64x8) -> i64x8; |
476 | #[link_name = "llvm.x86.avx512.conflict.q.256" ] |
477 | fn vpconflictq256(a: i64x4) -> i64x4; |
478 | #[link_name = "llvm.x86.avx512.conflict.q.128" ] |
479 | fn vpconflictq128(a: i64x2) -> i64x2; |
480 | |
481 | #[link_name = "llvm.ctlz.v16i32" ] |
482 | fn vplzcntd(a: i32x16, nonzero: bool) -> i32x16; |
483 | #[link_name = "llvm.ctlz.v8i32" ] |
484 | fn vplzcntd256(a: i32x8, nonzero: bool) -> i32x8; |
485 | #[link_name = "llvm.ctlz.v4i32" ] |
486 | fn vplzcntd128(a: i32x4, nonzero: bool) -> i32x4; |
487 | |
488 | #[link_name = "llvm.ctlz.v8i64" ] |
489 | fn vplzcntq(a: i64x8, nonzero: bool) -> i64x8; |
490 | #[link_name = "llvm.ctlz.v4i64" ] |
491 | fn vplzcntq256(a: i64x4, nonzero: bool) -> i64x4; |
492 | #[link_name = "llvm.ctlz.v2i64" ] |
493 | fn vplzcntq128(a: i64x2, nonzero: bool) -> i64x2; |
494 | } |
495 | |
496 | #[cfg (test)] |
497 | mod tests { |
498 | |
499 | use crate::core_arch::x86::*; |
500 | use stdarch_test::simd_test; |
501 | |
502 | #[simd_test(enable = "avx512cd" )] |
503 | unsafe fn test_mm512_broadcastmw_epi32() { |
504 | let a: __mmask16 = 2; |
505 | let r = _mm512_broadcastmw_epi32(a); |
506 | let e = _mm512_set1_epi32(2); |
507 | assert_eq_m512i(r, e); |
508 | } |
509 | |
510 | #[simd_test(enable = "avx512cd,avx512vl" )] |
511 | unsafe fn test_mm256_broadcastmw_epi32() { |
512 | let a: __mmask16 = 2; |
513 | let r = _mm256_broadcastmw_epi32(a); |
514 | let e = _mm256_set1_epi32(2); |
515 | assert_eq_m256i(r, e); |
516 | } |
517 | |
518 | #[simd_test(enable = "avx512cd,avx512vl" )] |
519 | unsafe fn test_mm_broadcastmw_epi32() { |
520 | let a: __mmask16 = 2; |
521 | let r = _mm_broadcastmw_epi32(a); |
522 | let e = _mm_set1_epi32(2); |
523 | assert_eq_m128i(r, e); |
524 | } |
525 | |
526 | #[simd_test(enable = "avx512cd" )] |
527 | unsafe fn test_mm512_broadcastmb_epi64() { |
528 | let a: __mmask8 = 2; |
529 | let r = _mm512_broadcastmb_epi64(a); |
530 | let e = _mm512_set1_epi64(2); |
531 | assert_eq_m512i(r, e); |
532 | } |
533 | |
534 | #[simd_test(enable = "avx512cd,avx512vl" )] |
535 | unsafe fn test_mm256_broadcastmb_epi64() { |
536 | let a: __mmask8 = 2; |
537 | let r = _mm256_broadcastmb_epi64(a); |
538 | let e = _mm256_set1_epi64x(2); |
539 | assert_eq_m256i(r, e); |
540 | } |
541 | |
542 | #[simd_test(enable = "avx512cd,avx512vl" )] |
543 | unsafe fn test_mm_broadcastmb_epi64() { |
544 | let a: __mmask8 = 2; |
545 | let r = _mm_broadcastmb_epi64(a); |
546 | let e = _mm_set1_epi64x(2); |
547 | assert_eq_m128i(r, e); |
548 | } |
549 | |
550 | #[simd_test(enable = "avx512cd" )] |
551 | unsafe fn test_mm512_conflict_epi32() { |
552 | let a = _mm512_set1_epi32(1); |
553 | let r = _mm512_conflict_epi32(a); |
554 | let e = _mm512_set_epi32( |
555 | 1 << 14 |
556 | | 1 << 13 |
557 | | 1 << 12 |
558 | | 1 << 11 |
559 | | 1 << 10 |
560 | | 1 << 9 |
561 | | 1 << 8 |
562 | | 1 << 7 |
563 | | 1 << 6 |
564 | | 1 << 5 |
565 | | 1 << 4 |
566 | | 1 << 3 |
567 | | 1 << 2 |
568 | | 1 << 1 |
569 | | 1 << 0, |
570 | 1 << 13 |
571 | | 1 << 12 |
572 | | 1 << 11 |
573 | | 1 << 10 |
574 | | 1 << 9 |
575 | | 1 << 8 |
576 | | 1 << 7 |
577 | | 1 << 6 |
578 | | 1 << 5 |
579 | | 1 << 4 |
580 | | 1 << 3 |
581 | | 1 << 2 |
582 | | 1 << 1 |
583 | | 1 << 0, |
584 | 1 << 12 |
585 | | 1 << 11 |
586 | | 1 << 10 |
587 | | 1 << 9 |
588 | | 1 << 8 |
589 | | 1 << 7 |
590 | | 1 << 6 |
591 | | 1 << 5 |
592 | | 1 << 4 |
593 | | 1 << 3 |
594 | | 1 << 2 |
595 | | 1 << 1 |
596 | | 1 << 0, |
597 | 1 << 11 |
598 | | 1 << 10 |
599 | | 1 << 9 |
600 | | 1 << 8 |
601 | | 1 << 7 |
602 | | 1 << 6 |
603 | | 1 << 5 |
604 | | 1 << 4 |
605 | | 1 << 3 |
606 | | 1 << 2 |
607 | | 1 << 1 |
608 | | 1 << 0, |
609 | 1 << 10 |
610 | | 1 << 9 |
611 | | 1 << 8 |
612 | | 1 << 7 |
613 | | 1 << 6 |
614 | | 1 << 5 |
615 | | 1 << 4 |
616 | | 1 << 3 |
617 | | 1 << 2 |
618 | | 1 << 1 |
619 | | 1 << 0, |
620 | 1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
621 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
622 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
623 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
624 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
625 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
626 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
627 | 1 << 2 | 1 << 1 | 1 << 0, |
628 | 1 << 1 | 1 << 0, |
629 | 1 << 0, |
630 | 0, |
631 | ); |
632 | assert_eq_m512i(r, e); |
633 | } |
634 | |
635 | #[simd_test(enable = "avx512cd" )] |
636 | unsafe fn test_mm512_mask_conflict_epi32() { |
637 | let a = _mm512_set1_epi32(1); |
638 | let r = _mm512_mask_conflict_epi32(a, 0, a); |
639 | assert_eq_m512i(r, a); |
640 | let r = _mm512_mask_conflict_epi32(a, 0b11111111_11111111, a); |
641 | let e = _mm512_set_epi32( |
642 | 1 << 14 |
643 | | 1 << 13 |
644 | | 1 << 12 |
645 | | 1 << 11 |
646 | | 1 << 10 |
647 | | 1 << 9 |
648 | | 1 << 8 |
649 | | 1 << 7 |
650 | | 1 << 6 |
651 | | 1 << 5 |
652 | | 1 << 4 |
653 | | 1 << 3 |
654 | | 1 << 2 |
655 | | 1 << 1 |
656 | | 1 << 0, |
657 | 1 << 13 |
658 | | 1 << 12 |
659 | | 1 << 11 |
660 | | 1 << 10 |
661 | | 1 << 9 |
662 | | 1 << 8 |
663 | | 1 << 7 |
664 | | 1 << 6 |
665 | | 1 << 5 |
666 | | 1 << 4 |
667 | | 1 << 3 |
668 | | 1 << 2 |
669 | | 1 << 1 |
670 | | 1 << 0, |
671 | 1 << 12 |
672 | | 1 << 11 |
673 | | 1 << 10 |
674 | | 1 << 9 |
675 | | 1 << 8 |
676 | | 1 << 7 |
677 | | 1 << 6 |
678 | | 1 << 5 |
679 | | 1 << 4 |
680 | | 1 << 3 |
681 | | 1 << 2 |
682 | | 1 << 1 |
683 | | 1 << 0, |
684 | 1 << 11 |
685 | | 1 << 10 |
686 | | 1 << 9 |
687 | | 1 << 8 |
688 | | 1 << 7 |
689 | | 1 << 6 |
690 | | 1 << 5 |
691 | | 1 << 4 |
692 | | 1 << 3 |
693 | | 1 << 2 |
694 | | 1 << 1 |
695 | | 1 << 0, |
696 | 1 << 10 |
697 | | 1 << 9 |
698 | | 1 << 8 |
699 | | 1 << 7 |
700 | | 1 << 6 |
701 | | 1 << 5 |
702 | | 1 << 4 |
703 | | 1 << 3 |
704 | | 1 << 2 |
705 | | 1 << 1 |
706 | | 1 << 0, |
707 | 1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
708 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
709 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
710 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
711 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
712 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
713 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
714 | 1 << 2 | 1 << 1 | 1 << 0, |
715 | 1 << 1 | 1 << 0, |
716 | 1 << 0, |
717 | 0, |
718 | ); |
719 | assert_eq_m512i(r, e); |
720 | } |
721 | |
722 | #[simd_test(enable = "avx512cd" )] |
723 | unsafe fn test_mm512_maskz_conflict_epi32() { |
724 | let a = _mm512_set1_epi32(1); |
725 | let r = _mm512_maskz_conflict_epi32(0, a); |
726 | assert_eq_m512i(r, _mm512_setzero_si512()); |
727 | let r = _mm512_maskz_conflict_epi32(0b11111111_11111111, a); |
728 | let e = _mm512_set_epi32( |
729 | 1 << 14 |
730 | | 1 << 13 |
731 | | 1 << 12 |
732 | | 1 << 11 |
733 | | 1 << 10 |
734 | | 1 << 9 |
735 | | 1 << 8 |
736 | | 1 << 7 |
737 | | 1 << 6 |
738 | | 1 << 5 |
739 | | 1 << 4 |
740 | | 1 << 3 |
741 | | 1 << 2 |
742 | | 1 << 1 |
743 | | 1 << 0, |
744 | 1 << 13 |
745 | | 1 << 12 |
746 | | 1 << 11 |
747 | | 1 << 10 |
748 | | 1 << 9 |
749 | | 1 << 8 |
750 | | 1 << 7 |
751 | | 1 << 6 |
752 | | 1 << 5 |
753 | | 1 << 4 |
754 | | 1 << 3 |
755 | | 1 << 2 |
756 | | 1 << 1 |
757 | | 1 << 0, |
758 | 1 << 12 |
759 | | 1 << 11 |
760 | | 1 << 10 |
761 | | 1 << 9 |
762 | | 1 << 8 |
763 | | 1 << 7 |
764 | | 1 << 6 |
765 | | 1 << 5 |
766 | | 1 << 4 |
767 | | 1 << 3 |
768 | | 1 << 2 |
769 | | 1 << 1 |
770 | | 1 << 0, |
771 | 1 << 11 |
772 | | 1 << 10 |
773 | | 1 << 9 |
774 | | 1 << 8 |
775 | | 1 << 7 |
776 | | 1 << 6 |
777 | | 1 << 5 |
778 | | 1 << 4 |
779 | | 1 << 3 |
780 | | 1 << 2 |
781 | | 1 << 1 |
782 | | 1 << 0, |
783 | 1 << 10 |
784 | | 1 << 9 |
785 | | 1 << 8 |
786 | | 1 << 7 |
787 | | 1 << 6 |
788 | | 1 << 5 |
789 | | 1 << 4 |
790 | | 1 << 3 |
791 | | 1 << 2 |
792 | | 1 << 1 |
793 | | 1 << 0, |
794 | 1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
795 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
796 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
797 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
798 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
799 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
800 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
801 | 1 << 2 | 1 << 1 | 1 << 0, |
802 | 1 << 1 | 1 << 0, |
803 | 1 << 0, |
804 | 0, |
805 | ); |
806 | assert_eq_m512i(r, e); |
807 | } |
808 | |
809 | #[simd_test(enable = "avx512cd,avx512vl" )] |
810 | unsafe fn test_mm256_conflict_epi32() { |
811 | let a = _mm256_set1_epi32(1); |
812 | let r = _mm256_conflict_epi32(a); |
813 | let e = _mm256_set_epi32( |
814 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
815 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
816 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
817 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
818 | 1 << 2 | 1 << 1 | 1 << 0, |
819 | 1 << 1 | 1 << 0, |
820 | 1 << 0, |
821 | 0, |
822 | ); |
823 | assert_eq_m256i(r, e); |
824 | } |
825 | |
826 | #[simd_test(enable = "avx512cd,avx512vl" )] |
827 | unsafe fn test_mm256_mask_conflict_epi32() { |
828 | let a = _mm256_set1_epi32(1); |
829 | let r = _mm256_mask_conflict_epi32(a, 0, a); |
830 | assert_eq_m256i(r, a); |
831 | let r = _mm256_mask_conflict_epi32(a, 0b11111111, a); |
832 | let e = _mm256_set_epi32( |
833 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
834 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
835 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
836 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
837 | 1 << 2 | 1 << 1 | 1 << 0, |
838 | 1 << 1 | 1 << 0, |
839 | 1 << 0, |
840 | 0, |
841 | ); |
842 | assert_eq_m256i(r, e); |
843 | } |
844 | |
845 | #[simd_test(enable = "avx512cd,avx512vl" )] |
846 | unsafe fn test_mm256_maskz_conflict_epi32() { |
847 | let a = _mm256_set1_epi32(1); |
848 | let r = _mm256_maskz_conflict_epi32(0, a); |
849 | assert_eq_m256i(r, _mm256_setzero_si256()); |
850 | let r = _mm256_maskz_conflict_epi32(0b11111111, a); |
851 | let e = _mm256_set_epi32( |
852 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
853 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
854 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
855 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
856 | 1 << 2 | 1 << 1 | 1 << 0, |
857 | 1 << 1 | 1 << 0, |
858 | 1 << 0, |
859 | 0, |
860 | ); |
861 | assert_eq_m256i(r, e); |
862 | } |
863 | |
864 | #[simd_test(enable = "avx512cd,avx512vl" )] |
865 | unsafe fn test_mm_conflict_epi32() { |
866 | let a = _mm_set1_epi32(1); |
867 | let r = _mm_conflict_epi32(a); |
868 | let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); |
869 | assert_eq_m128i(r, e); |
870 | } |
871 | |
872 | #[simd_test(enable = "avx512cd,avx512vl" )] |
873 | unsafe fn test_mm_mask_conflict_epi32() { |
874 | let a = _mm_set1_epi32(1); |
875 | let r = _mm_mask_conflict_epi32(a, 0, a); |
876 | assert_eq_m128i(r, a); |
877 | let r = _mm_mask_conflict_epi32(a, 0b00001111, a); |
878 | let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); |
879 | assert_eq_m128i(r, e); |
880 | } |
881 | |
882 | #[simd_test(enable = "avx512cd,avx512vl" )] |
883 | unsafe fn test_mm_maskz_conflict_epi32() { |
884 | let a = _mm_set1_epi32(1); |
885 | let r = _mm_maskz_conflict_epi32(0, a); |
886 | assert_eq_m128i(r, _mm_setzero_si128()); |
887 | let r = _mm_maskz_conflict_epi32(0b00001111, a); |
888 | let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); |
889 | assert_eq_m128i(r, e); |
890 | } |
891 | |
892 | #[simd_test(enable = "avx512cd" )] |
893 | unsafe fn test_mm512_conflict_epi64() { |
894 | let a = _mm512_set1_epi64(1); |
895 | let r = _mm512_conflict_epi64(a); |
896 | let e = _mm512_set_epi64( |
897 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
898 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
899 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
900 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
901 | 1 << 2 | 1 << 1 | 1 << 0, |
902 | 1 << 1 | 1 << 0, |
903 | 1 << 0, |
904 | 0, |
905 | ); |
906 | assert_eq_m512i(r, e); |
907 | } |
908 | |
909 | #[simd_test(enable = "avx512cd" )] |
910 | unsafe fn test_mm512_mask_conflict_epi64() { |
911 | let a = _mm512_set1_epi64(1); |
912 | let r = _mm512_mask_conflict_epi64(a, 0, a); |
913 | assert_eq_m512i(r, a); |
914 | let r = _mm512_mask_conflict_epi64(a, 0b11111111, a); |
915 | let e = _mm512_set_epi64( |
916 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
917 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
918 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
919 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
920 | 1 << 2 | 1 << 1 | 1 << 0, |
921 | 1 << 1 | 1 << 0, |
922 | 1 << 0, |
923 | 0, |
924 | ); |
925 | assert_eq_m512i(r, e); |
926 | } |
927 | |
928 | #[simd_test(enable = "avx512cd" )] |
929 | unsafe fn test_mm512_maskz_conflict_epi64() { |
930 | let a = _mm512_set1_epi64(1); |
931 | let r = _mm512_maskz_conflict_epi64(0, a); |
932 | assert_eq_m512i(r, _mm512_setzero_si512()); |
933 | let r = _mm512_maskz_conflict_epi64(0b11111111, a); |
934 | let e = _mm512_set_epi64( |
935 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
936 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
937 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
938 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
939 | 1 << 2 | 1 << 1 | 1 << 0, |
940 | 1 << 1 | 1 << 0, |
941 | 1 << 0, |
942 | 0, |
943 | ); |
944 | assert_eq_m512i(r, e); |
945 | } |
946 | |
947 | #[simd_test(enable = "avx512cd,avx512vl" )] |
948 | unsafe fn test_mm256_conflict_epi64() { |
949 | let a = _mm256_set1_epi64x(1); |
950 | let r = _mm256_conflict_epi64(a); |
951 | let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); |
952 | assert_eq_m256i(r, e); |
953 | } |
954 | |
955 | #[simd_test(enable = "avx512cd,avx512vl" )] |
956 | unsafe fn test_mm256_mask_conflict_epi64() { |
957 | let a = _mm256_set1_epi64x(1); |
958 | let r = _mm256_mask_conflict_epi64(a, 0, a); |
959 | assert_eq_m256i(r, a); |
960 | let r = _mm256_mask_conflict_epi64(a, 0b00001111, a); |
961 | let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); |
962 | assert_eq_m256i(r, e); |
963 | } |
964 | |
965 | #[simd_test(enable = "avx512cd,avx512vl" )] |
966 | unsafe fn test_mm256_maskz_conflict_epi64() { |
967 | let a = _mm256_set1_epi64x(1); |
968 | let r = _mm256_maskz_conflict_epi64(0, a); |
969 | assert_eq_m256i(r, _mm256_setzero_si256()); |
970 | let r = _mm256_maskz_conflict_epi64(0b00001111, a); |
971 | let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); |
972 | assert_eq_m256i(r, e); |
973 | } |
974 | |
975 | #[simd_test(enable = "avx512cd,avx512vl" )] |
976 | unsafe fn test_mm_conflict_epi64() { |
977 | let a = _mm_set1_epi64x(1); |
978 | let r = _mm_conflict_epi64(a); |
979 | let e = _mm_set_epi64x(1 << 0, 0); |
980 | assert_eq_m128i(r, e); |
981 | } |
982 | |
983 | #[simd_test(enable = "avx512cd,avx512vl" )] |
984 | unsafe fn test_mm_mask_conflict_epi64() { |
985 | let a = _mm_set1_epi64x(1); |
986 | let r = _mm_mask_conflict_epi64(a, 0, a); |
987 | assert_eq_m128i(r, a); |
988 | let r = _mm_mask_conflict_epi64(a, 0b00000011, a); |
989 | let e = _mm_set_epi64x(1 << 0, 0); |
990 | assert_eq_m128i(r, e); |
991 | } |
992 | |
993 | #[simd_test(enable = "avx512cd,avx512vl" )] |
994 | unsafe fn test_mm_maskz_conflict_epi64() { |
995 | let a = _mm_set1_epi64x(1); |
996 | let r = _mm_maskz_conflict_epi64(0, a); |
997 | assert_eq_m128i(r, _mm_setzero_si128()); |
998 | let r = _mm_maskz_conflict_epi64(0b00000011, a); |
999 | let e = _mm_set_epi64x(1 << 0, 0); |
1000 | assert_eq_m128i(r, e); |
1001 | } |
1002 | |
1003 | #[simd_test(enable = "avx512cd" )] |
1004 | unsafe fn test_mm512_lzcnt_epi32() { |
1005 | let a = _mm512_set1_epi32(1); |
1006 | let r = _mm512_lzcnt_epi32(a); |
1007 | let e = _mm512_set1_epi32(31); |
1008 | assert_eq_m512i(r, e); |
1009 | } |
1010 | |
1011 | #[simd_test(enable = "avx512cd" )] |
1012 | unsafe fn test_mm512_mask_lzcnt_epi32() { |
1013 | let a = _mm512_set1_epi32(1); |
1014 | let r = _mm512_mask_lzcnt_epi32(a, 0, a); |
1015 | assert_eq_m512i(r, a); |
1016 | let r = _mm512_mask_lzcnt_epi32(a, 0b11111111_11111111, a); |
1017 | let e = _mm512_set1_epi32(31); |
1018 | assert_eq_m512i(r, e); |
1019 | } |
1020 | |
1021 | #[simd_test(enable = "avx512cd" )] |
1022 | unsafe fn test_mm512_maskz_lzcnt_epi32() { |
1023 | let a = _mm512_set1_epi32(2); |
1024 | let r = _mm512_maskz_lzcnt_epi32(0, a); |
1025 | assert_eq_m512i(r, _mm512_setzero_si512()); |
1026 | let r = _mm512_maskz_lzcnt_epi32(0b11111111_11111111, a); |
1027 | let e = _mm512_set1_epi32(30); |
1028 | assert_eq_m512i(r, e); |
1029 | } |
1030 | |
1031 | #[simd_test(enable = "avx512cd,avx512vl" )] |
1032 | unsafe fn test_mm256_lzcnt_epi32() { |
1033 | let a = _mm256_set1_epi32(1); |
1034 | let r = _mm256_lzcnt_epi32(a); |
1035 | let e = _mm256_set1_epi32(31); |
1036 | assert_eq_m256i(r, e); |
1037 | } |
1038 | |
1039 | #[simd_test(enable = "avx512cd,avx512vl" )] |
1040 | unsafe fn test_mm256_mask_lzcnt_epi32() { |
1041 | let a = _mm256_set1_epi32(1); |
1042 | let r = _mm256_mask_lzcnt_epi32(a, 0, a); |
1043 | assert_eq_m256i(r, a); |
1044 | let r = _mm256_mask_lzcnt_epi32(a, 0b11111111, a); |
1045 | let e = _mm256_set1_epi32(31); |
1046 | assert_eq_m256i(r, e); |
1047 | } |
1048 | |
1049 | #[simd_test(enable = "avx512cd,avx512vl" )] |
1050 | unsafe fn test_mm256_maskz_lzcnt_epi32() { |
1051 | let a = _mm256_set1_epi32(1); |
1052 | let r = _mm256_maskz_lzcnt_epi32(0, a); |
1053 | assert_eq_m256i(r, _mm256_setzero_si256()); |
1054 | let r = _mm256_maskz_lzcnt_epi32(0b11111111, a); |
1055 | let e = _mm256_set1_epi32(31); |
1056 | assert_eq_m256i(r, e); |
1057 | } |
1058 | |
1059 | #[simd_test(enable = "avx512cd,avx512vl" )] |
1060 | unsafe fn test_mm_lzcnt_epi32() { |
1061 | let a = _mm_set1_epi32(1); |
1062 | let r = _mm_lzcnt_epi32(a); |
1063 | let e = _mm_set1_epi32(31); |
1064 | assert_eq_m128i(r, e); |
1065 | } |
1066 | |
1067 | #[simd_test(enable = "avx512cd,avx512vl" )] |
1068 | unsafe fn test_mm_mask_lzcnt_epi32() { |
1069 | let a = _mm_set1_epi32(1); |
1070 | let r = _mm_mask_lzcnt_epi32(a, 0, a); |
1071 | assert_eq_m128i(r, a); |
1072 | let r = _mm_mask_lzcnt_epi32(a, 0b00001111, a); |
1073 | let e = _mm_set1_epi32(31); |
1074 | assert_eq_m128i(r, e); |
1075 | } |
1076 | |
1077 | #[simd_test(enable = "avx512cd,avx512vl" )] |
1078 | unsafe fn test_mm_maskz_lzcnt_epi32() { |
1079 | let a = _mm_set1_epi32(1); |
1080 | let r = _mm_maskz_lzcnt_epi32(0, a); |
1081 | assert_eq_m128i(r, _mm_setzero_si128()); |
1082 | let r = _mm_maskz_lzcnt_epi32(0b00001111, a); |
1083 | let e = _mm_set1_epi32(31); |
1084 | assert_eq_m128i(r, e); |
1085 | } |
1086 | |
1087 | #[simd_test(enable = "avx512cd" )] |
1088 | unsafe fn test_mm512_lzcnt_epi64() { |
1089 | let a = _mm512_set1_epi64(1); |
1090 | let r = _mm512_lzcnt_epi64(a); |
1091 | let e = _mm512_set1_epi64(63); |
1092 | assert_eq_m512i(r, e); |
1093 | } |
1094 | |
1095 | #[simd_test(enable = "avx512cd" )] |
1096 | unsafe fn test_mm512_mask_lzcnt_epi64() { |
1097 | let a = _mm512_set1_epi64(1); |
1098 | let r = _mm512_mask_lzcnt_epi64(a, 0, a); |
1099 | assert_eq_m512i(r, a); |
1100 | let r = _mm512_mask_lzcnt_epi64(a, 0b11111111, a); |
1101 | let e = _mm512_set1_epi64(63); |
1102 | assert_eq_m512i(r, e); |
1103 | } |
1104 | |
1105 | #[simd_test(enable = "avx512cd" )] |
1106 | unsafe fn test_mm512_maskz_lzcnt_epi64() { |
1107 | let a = _mm512_set1_epi64(2); |
1108 | let r = _mm512_maskz_lzcnt_epi64(0, a); |
1109 | assert_eq_m512i(r, _mm512_setzero_si512()); |
1110 | let r = _mm512_maskz_lzcnt_epi64(0b11111111, a); |
1111 | let e = _mm512_set1_epi64(62); |
1112 | assert_eq_m512i(r, e); |
1113 | } |
1114 | |
1115 | #[simd_test(enable = "avx512cd,avx512vl" )] |
1116 | unsafe fn test_mm256_lzcnt_epi64() { |
1117 | let a = _mm256_set1_epi64x(1); |
1118 | let r = _mm256_lzcnt_epi64(a); |
1119 | let e = _mm256_set1_epi64x(63); |
1120 | assert_eq_m256i(r, e); |
1121 | } |
1122 | |
1123 | #[simd_test(enable = "avx512cd,avx512vl" )] |
1124 | unsafe fn test_mm256_mask_lzcnt_epi64() { |
1125 | let a = _mm256_set1_epi64x(1); |
1126 | let r = _mm256_mask_lzcnt_epi64(a, 0, a); |
1127 | assert_eq_m256i(r, a); |
1128 | let r = _mm256_mask_lzcnt_epi64(a, 0b00001111, a); |
1129 | let e = _mm256_set1_epi64x(63); |
1130 | assert_eq_m256i(r, e); |
1131 | } |
1132 | |
1133 | #[simd_test(enable = "avx512cd,avx512vl" )] |
1134 | unsafe fn test_mm256_maskz_lzcnt_epi64() { |
1135 | let a = _mm256_set1_epi64x(1); |
1136 | let r = _mm256_maskz_lzcnt_epi64(0, a); |
1137 | assert_eq_m256i(r, _mm256_setzero_si256()); |
1138 | let r = _mm256_maskz_lzcnt_epi64(0b00001111, a); |
1139 | let e = _mm256_set1_epi64x(63); |
1140 | assert_eq_m256i(r, e); |
1141 | } |
1142 | |
1143 | #[simd_test(enable = "avx512cd,avx512vl" )] |
1144 | unsafe fn test_mm_lzcnt_epi64() { |
1145 | let a = _mm_set1_epi64x(1); |
1146 | let r = _mm_lzcnt_epi64(a); |
1147 | let e = _mm_set1_epi64x(63); |
1148 | assert_eq_m128i(r, e); |
1149 | } |
1150 | |
1151 | #[simd_test(enable = "avx512cd,avx512vl" )] |
1152 | unsafe fn test_mm_mask_lzcnt_epi64() { |
1153 | let a = _mm_set1_epi64x(1); |
1154 | let r = _mm_mask_lzcnt_epi64(a, 0, a); |
1155 | assert_eq_m128i(r, a); |
1156 | let r = _mm_mask_lzcnt_epi64(a, 0b00001111, a); |
1157 | let e = _mm_set1_epi64x(63); |
1158 | assert_eq_m128i(r, e); |
1159 | } |
1160 | |
1161 | #[simd_test(enable = "avx512cd,avx512vl" )] |
1162 | unsafe fn test_mm_maskz_lzcnt_epi64() { |
1163 | let a = _mm_set1_epi64x(1); |
1164 | let r = _mm_maskz_lzcnt_epi64(0, a); |
1165 | assert_eq_m128i(r, _mm_setzero_si128()); |
1166 | let r = _mm_maskz_lzcnt_epi64(0b00001111, a); |
1167 | let e = _mm_set1_epi64x(63); |
1168 | assert_eq_m128i(r, e); |
1169 | } |
1170 | } |
1171 | |