1use crate::{
2 core_arch::{simd::*, x86::*},
3 intrinsics::simd::*,
4 mem::transmute,
5};
6
7// And //
8
9/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
10/// and store the results in dst using writemask k (elements are copied from src if the corresponding
11/// bit is not set).
12///
13/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_pd&ig_expand=288)
14#[inline]
15#[target_feature(enable = "avx512dq,avx512vl")]
16#[cfg_attr(test, assert_instr(vandpd))]
17#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
19pub const fn _mm_mask_and_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
20 unsafe {
21 let and: Simd = _mm_and_pd(a, b).as_f64x2();
22 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f64x2()))
23 }
24}
25
26/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
27/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
28///
29/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_pd&ig_expand=289)
30#[inline]
31#[target_feature(enable = "avx512dq,avx512vl")]
32#[cfg_attr(test, assert_instr(vandpd))]
33#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
35pub const fn _mm_maskz_and_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36 unsafe {
37 let and: Simd = _mm_and_pd(a, b).as_f64x2();
38 transmute(src:simd_select_bitmask(m:k, yes:and, no:f64x2::ZERO))
39 }
40}
41
42/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
43/// and store the results in dst using writemask k (elements are copied from src if the corresponding
44/// bit is not set).
45///
46/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_pd&ig_expand=291)
47#[inline]
48#[target_feature(enable = "avx512dq,avx512vl")]
49#[cfg_attr(test, assert_instr(vandpd))]
50#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
51#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
52pub const fn _mm256_mask_and_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
53 unsafe {
54 let and: Simd = _mm256_and_pd(a, b).as_f64x4();
55 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f64x4()))
56 }
57}
58
59/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
60/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
61///
62/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_pd&ig_expand=292)
63#[inline]
64#[target_feature(enable = "avx512dq,avx512vl")]
65#[cfg_attr(test, assert_instr(vandpd))]
66#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
67#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
68pub const fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
69 unsafe {
70 let and: Simd = _mm256_and_pd(a, b).as_f64x4();
71 transmute(src:simd_select_bitmask(m:k, yes:and, no:f64x4::ZERO))
72 }
73}
74
75/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
76/// and store the results in dst.
77///
78/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_pd&ig_expand=293)
79#[inline]
80#[target_feature(enable = "avx512dq")]
81#[cfg_attr(test, assert_instr(vandp))]
82#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
83#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
84pub const fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d {
85 unsafe { transmute(src:simd_and(x:transmute::<_, u64x8>(a), y:transmute::<_, u64x8>(src:b))) }
86}
87
88/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b
89/// and store the results in dst using writemask k (elements are copied from src if the corresponding
90/// bit is not set).
91///
92/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_pd&ig_expand=294)
93#[inline]
94#[target_feature(enable = "avx512dq")]
95#[cfg_attr(test, assert_instr(vandpd))]
96#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
97#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
98pub const fn _mm512_mask_and_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
99 unsafe {
100 let and: Simd = _mm512_and_pd(a, b).as_f64x8();
101 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f64x8()))
102 }
103}
104
105/// Compute the bitwise AND of packed double-precision (64-bit) floating point numbers in a and b and
106/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
107///
108/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_pd&ig_expand=295)
109#[inline]
110#[target_feature(enable = "avx512dq")]
111#[cfg_attr(test, assert_instr(vandpd))]
112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
113#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
114pub const fn _mm512_maskz_and_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
115 unsafe {
116 let and: Simd = _mm512_and_pd(a, b).as_f64x8();
117 transmute(src:simd_select_bitmask(m:k, yes:and, no:f64x8::ZERO))
118 }
119}
120
121/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
122/// and store the results in dst using writemask k (elements are copied from src if the corresponding
123/// bit is not set).
124///
125/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_ps&ig_expand=297)
126#[inline]
127#[target_feature(enable = "avx512dq,avx512vl")]
128#[cfg_attr(test, assert_instr(vandps))]
129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
130#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
131pub const fn _mm_mask_and_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
132 unsafe {
133 let and: Simd = _mm_and_ps(a, b).as_f32x4();
134 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f32x4()))
135 }
136}
137
138/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
139/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
140///
141/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_ps&ig_expand=298)
142#[inline]
143#[target_feature(enable = "avx512dq,avx512vl")]
144#[cfg_attr(test, assert_instr(vandps))]
145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
146#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
147pub const fn _mm_maskz_and_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
148 unsafe {
149 let and: Simd = _mm_and_ps(a, b).as_f32x4();
150 transmute(src:simd_select_bitmask(m:k, yes:and, no:f32x4::ZERO))
151 }
152}
153
154/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
155/// and store the results in dst using writemask k (elements are copied from src if the corresponding
156/// bit is not set).
157///
158/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_ps&ig_expand=300)
159#[inline]
160#[target_feature(enable = "avx512dq,avx512vl")]
161#[cfg_attr(test, assert_instr(vandps))]
162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
163#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
164pub const fn _mm256_mask_and_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
165 unsafe {
166 let and: Simd = _mm256_and_ps(a, b).as_f32x8();
167 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f32x8()))
168 }
169}
170
171/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
172/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
173///
174/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_ps&ig_expand=301)
175#[inline]
176#[target_feature(enable = "avx512dq,avx512vl")]
177#[cfg_attr(test, assert_instr(vandps))]
178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
179#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
180pub const fn _mm256_maskz_and_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
181 unsafe {
182 let and: Simd = _mm256_and_ps(a, b).as_f32x8();
183 transmute(src:simd_select_bitmask(m:k, yes:and, no:f32x8::ZERO))
184 }
185}
186
187/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
188/// and store the results in dst.
189///
190/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_ps&ig_expand=303)
191#[inline]
192#[target_feature(enable = "avx512dq")]
193#[cfg_attr(test, assert_instr(vandps))]
194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
195#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
196pub const fn _mm512_and_ps(a: __m512, b: __m512) -> __m512 {
197 unsafe {
198 transmute(src:simd_and(
199 x:transmute::<_, u32x16>(a),
200 y:transmute::<_, u32x16>(src:b),
201 ))
202 }
203}
204
205/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b
206/// and store the results in dst using writemask k (elements are copied from src if the corresponding
207/// bit is not set).
208///
209/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_ps&ig_expand=304)
210#[inline]
211#[target_feature(enable = "avx512dq")]
212#[cfg_attr(test, assert_instr(vandps))]
213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
214#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
215pub const fn _mm512_mask_and_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
216 unsafe {
217 let and: Simd = _mm512_and_ps(a, b).as_f32x16();
218 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_f32x16()))
219 }
220}
221
222/// Compute the bitwise AND of packed single-precision (32-bit) floating point numbers in a and b and
223/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
224///
225/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_ps&ig_expand=305)
226#[inline]
227#[target_feature(enable = "avx512dq")]
228#[cfg_attr(test, assert_instr(vandps))]
229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
230#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
231pub const fn _mm512_maskz_and_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
232 unsafe {
233 let and: Simd = _mm512_and_ps(a, b).as_f32x16();
234 transmute(src:simd_select_bitmask(m:k, yes:and, no:f32x16::ZERO))
235 }
236}
237
238// Andnot
239
240/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
241/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
242/// corresponding bit is not set).
243///
244/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_pd&ig_expand=326)
245#[inline]
246#[target_feature(enable = "avx512dq,avx512vl")]
247#[cfg_attr(test, assert_instr(vandnpd))]
248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
249#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
250pub const fn _mm_mask_andnot_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
251 unsafe {
252 let andnot: Simd = _mm_andnot_pd(a, b).as_f64x2();
253 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f64x2()))
254 }
255}
256
257/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
258/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
259/// corresponding bit is not set).
260///
261/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_pd&ig_expand=327)
262#[inline]
263#[target_feature(enable = "avx512dq,avx512vl")]
264#[cfg_attr(test, assert_instr(vandnpd))]
265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
266#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
267pub const fn _mm_maskz_andnot_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
268 unsafe {
269 let andnot: Simd = _mm_andnot_pd(a, b).as_f64x2();
270 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f64x2::ZERO))
271 }
272}
273
274/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
275/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
276/// corresponding bit is not set).
277///
278/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_pd&ig_expand=329)
279#[inline]
280#[target_feature(enable = "avx512dq,avx512vl")]
281#[cfg_attr(test, assert_instr(vandnpd))]
282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
283#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
284pub const fn _mm256_mask_andnot_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
285 unsafe {
286 let andnot: Simd = _mm256_andnot_pd(a, b).as_f64x4();
287 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f64x4()))
288 }
289}
290
291/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
292/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
293/// corresponding bit is not set).
294///
295/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_pd&ig_expand=330)
296#[inline]
297#[target_feature(enable = "avx512dq,avx512vl")]
298#[cfg_attr(test, assert_instr(vandnpd))]
299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
300#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
301pub const fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
302 unsafe {
303 let andnot: Simd = _mm256_andnot_pd(a, b).as_f64x4();
304 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f64x4::ZERO))
305 }
306}
307
308/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
309/// bitwise AND with b and store the results in dst.
310///
311/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_pd&ig_expand=331)
312#[inline]
313#[target_feature(enable = "avx512dq")]
314#[cfg_attr(test, assert_instr(vandnp))]
315#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
316#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
317pub const fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d {
318 unsafe { _mm512_and_pd(a:_mm512_xor_pd(a, b:transmute(src:_mm512_set1_epi64(-1))), b) }
319}
320
321/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
322/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
323/// corresponding bit is not set).
324///
325/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_pd&ig_expand=332)
326#[inline]
327#[target_feature(enable = "avx512dq")]
328#[cfg_attr(test, assert_instr(vandnpd))]
329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
330#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
331pub const fn _mm512_mask_andnot_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
332 unsafe {
333 let andnot: Simd = _mm512_andnot_pd(a, b).as_f64x8();
334 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f64x8()))
335 }
336}
337
338/// Compute the bitwise NOT of packed double-precision (64-bit) floating point numbers in a and then
339/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
340/// corresponding bit is not set).
341///
342/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_pd&ig_expand=333)
343#[inline]
344#[target_feature(enable = "avx512dq")]
345#[cfg_attr(test, assert_instr(vandnpd))]
346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
347#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
348pub const fn _mm512_maskz_andnot_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
349 unsafe {
350 let andnot: Simd = _mm512_andnot_pd(a, b).as_f64x8();
351 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f64x8::ZERO))
352 }
353}
354
355/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
356/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
357/// corresponding bit is not set).
358///
359/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_ps&ig_expand=335)
360#[inline]
361#[target_feature(enable = "avx512dq,avx512vl")]
362#[cfg_attr(test, assert_instr(vandnps))]
363#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
364#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
365pub const fn _mm_mask_andnot_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
366 unsafe {
367 let andnot: Simd = _mm_andnot_ps(a, b).as_f32x4();
368 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f32x4()))
369 }
370}
371
372/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
373/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
374/// corresponding bit is not set).
375///
376/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_ps&ig_expand=336)
377#[inline]
378#[target_feature(enable = "avx512dq,avx512vl")]
379#[cfg_attr(test, assert_instr(vandnps))]
380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
381#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
382pub const fn _mm_maskz_andnot_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
383 unsafe {
384 let andnot: Simd = _mm_andnot_ps(a, b).as_f32x4();
385 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f32x4::ZERO))
386 }
387}
388
389/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
390/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
391/// corresponding bit is not set).
392///
393/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_ps&ig_expand=338)
394#[inline]
395#[target_feature(enable = "avx512dq,avx512vl")]
396#[cfg_attr(test, assert_instr(vandnps))]
397#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
398#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
399pub const fn _mm256_mask_andnot_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
400 unsafe {
401 let andnot: Simd = _mm256_andnot_ps(a, b).as_f32x8();
402 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f32x8()))
403 }
404}
405
406/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
407/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
408/// corresponding bit is not set).
409///
410/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_ps&ig_expand=339)
411#[inline]
412#[target_feature(enable = "avx512dq,avx512vl")]
413#[cfg_attr(test, assert_instr(vandnps))]
414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
415#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
416pub const fn _mm256_maskz_andnot_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
417 unsafe {
418 let andnot: Simd = _mm256_andnot_ps(a, b).as_f32x8();
419 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f32x8::ZERO))
420 }
421}
422
423/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
424/// bitwise AND with b and store the results in dst.
425///
426/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_ps&ig_expand=340)
427#[inline]
428#[target_feature(enable = "avx512dq")]
429#[cfg_attr(test, assert_instr(vandnps))]
430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
432pub const fn _mm512_andnot_ps(a: __m512, b: __m512) -> __m512 {
433 unsafe { _mm512_and_ps(a:_mm512_xor_ps(a, b:transmute(src:_mm512_set1_epi32(-1))), b) }
434}
435
436/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
437/// bitwise AND with b and store the results in dst using writemask k (elements are copied from src if the
438/// corresponding bit is not set).
439///
440/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_ps&ig_expand=341)
441#[inline]
442#[target_feature(enable = "avx512dq")]
443#[cfg_attr(test, assert_instr(vandnps))]
444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
445#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
446pub const fn _mm512_mask_andnot_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
447 unsafe {
448 let andnot: Simd = _mm512_andnot_ps(a, b).as_f32x16();
449 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_f32x16()))
450 }
451}
452
453/// Compute the bitwise NOT of packed single-precision (32-bit) floating point numbers in a and then
454/// bitwise AND with b and store the results in dst using zeromask k (elements are zeroed out if the
455/// corresponding bit is not set).
456///
457/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_ps&ig_expand=342)
458#[inline]
459#[target_feature(enable = "avx512dq")]
460#[cfg_attr(test, assert_instr(vandnps))]
461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
462#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
463pub const fn _mm512_maskz_andnot_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
464 unsafe {
465 let andnot: Simd = _mm512_andnot_ps(a, b).as_f32x16();
466 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:f32x16::ZERO))
467 }
468}
469
470// Or
471
472/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
473/// and store the results in dst using writemask k (elements are copied from src if the corresponding
474/// bit is not set).
475///
476/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_pd&ig_expand=4824)
477#[inline]
478#[target_feature(enable = "avx512dq,avx512vl")]
479#[cfg_attr(test, assert_instr(vorpd))]
480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
481#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
482pub const fn _mm_mask_or_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
483 unsafe {
484 let or: Simd = _mm_or_pd(a, b).as_f64x2();
485 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f64x2()))
486 }
487}
488
489/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
490/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
491///
492/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_pd&ig_expand=4825)
493#[inline]
494#[target_feature(enable = "avx512dq,avx512vl")]
495#[cfg_attr(test, assert_instr(vorpd))]
496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
497#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
498pub const fn _mm_maskz_or_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
499 unsafe {
500 let or: Simd = _mm_or_pd(a, b).as_f64x2();
501 transmute(src:simd_select_bitmask(m:k, yes:or, no:f64x2::ZERO))
502 }
503}
504
505/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
506/// and store the results in dst using writemask k (elements are copied from src if the corresponding
507/// bit is not set).
508///
509/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_pd&ig_expand=4827)
510#[inline]
511#[target_feature(enable = "avx512dq,avx512vl")]
512#[cfg_attr(test, assert_instr(vorpd))]
513#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
514#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
515pub const fn _mm256_mask_or_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
516 unsafe {
517 let or: Simd = _mm256_or_pd(a, b).as_f64x4();
518 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f64x4()))
519 }
520}
521
522/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
523/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
524///
525/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_pd&ig_expand=4828)
526#[inline]
527#[target_feature(enable = "avx512dq,avx512vl")]
528#[cfg_attr(test, assert_instr(vorpd))]
529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
530#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
531pub const fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
532 unsafe {
533 let or: Simd = _mm256_or_pd(a, b).as_f64x4();
534 transmute(src:simd_select_bitmask(m:k, yes:or, no:f64x4::ZERO))
535 }
536}
537
538/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b
539/// and store the results in dst.
540///
541/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_pd&ig_expand=4829)
542#[inline]
543#[target_feature(enable = "avx512dq")]
544#[cfg_attr(test, assert_instr(vorp))]
545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
546#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
547pub const fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d {
548 unsafe { transmute(src:simd_or(x:transmute::<_, u64x8>(a), y:transmute::<_, u64x8>(src:b))) }
549}
550
551/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
552/// store the results in dst using writemask k (elements are copied from src if the corresponding
553/// bit is not set).
554///
555/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_pd&ig_expand=4830)
556#[inline]
557#[target_feature(enable = "avx512dq")]
558#[cfg_attr(test, assert_instr(vorpd))]
559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
560#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
561pub const fn _mm512_mask_or_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
562 unsafe {
563 let or: Simd = _mm512_or_pd(a, b).as_f64x8();
564 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f64x8()))
565 }
566}
567
568/// Compute the bitwise OR of packed double-precision (64-bit) floating point numbers in a and b and
569/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
570///
571/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_pd&ig_expand=4831)
572#[inline]
573#[target_feature(enable = "avx512dq")]
574#[cfg_attr(test, assert_instr(vorpd))]
575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
576#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
577pub const fn _mm512_maskz_or_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
578 unsafe {
579 let or: Simd = _mm512_or_pd(a, b).as_f64x8();
580 transmute(src:simd_select_bitmask(m:k, yes:or, no:f64x8::ZERO))
581 }
582}
583
584/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
585/// and store the results in dst using writemask k (elements are copied from src if the corresponding
586/// bit is not set).
587///
588/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_ps&ig_expand=4833)
589#[inline]
590#[target_feature(enable = "avx512dq,avx512vl")]
591#[cfg_attr(test, assert_instr(vorps))]
592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
593#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
594pub const fn _mm_mask_or_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
595 unsafe {
596 let or: Simd = _mm_or_ps(a, b).as_f32x4();
597 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f32x4()))
598 }
599}
600
601/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
602/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
603///
604/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_ps&ig_expand=4834)
605#[inline]
606#[target_feature(enable = "avx512dq,avx512vl")]
607#[cfg_attr(test, assert_instr(vorps))]
608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
609#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
610pub const fn _mm_maskz_or_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
611 unsafe {
612 let or: Simd = _mm_or_ps(a, b).as_f32x4();
613 transmute(src:simd_select_bitmask(m:k, yes:or, no:f32x4::ZERO))
614 }
615}
616
617/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
618/// and store the results in dst using writemask k (elements are copied from src if the corresponding
619/// bit is not set).
620///
621/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_ps&ig_expand=4836)
622#[inline]
623#[target_feature(enable = "avx512dq,avx512vl")]
624#[cfg_attr(test, assert_instr(vorps))]
625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
626#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
627pub const fn _mm256_mask_or_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
628 unsafe {
629 let or: Simd = _mm256_or_ps(a, b).as_f32x8();
630 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f32x8()))
631 }
632}
633
634/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
635/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
636///
637/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_ps&ig_expand=4837)
638#[inline]
639#[target_feature(enable = "avx512dq,avx512vl")]
640#[cfg_attr(test, assert_instr(vorps))]
641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
642#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
643pub const fn _mm256_maskz_or_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
644 unsafe {
645 let or: Simd = _mm256_or_ps(a, b).as_f32x8();
646 transmute(src:simd_select_bitmask(m:k, yes:or, no:f32x8::ZERO))
647 }
648}
649
650/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b
651/// and store the results in dst.
652///
653/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_ps&ig_expand=4838)
654#[inline]
655#[target_feature(enable = "avx512dq")]
656#[cfg_attr(test, assert_instr(vorps))]
657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
658#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
659pub const fn _mm512_or_ps(a: __m512, b: __m512) -> __m512 {
660 unsafe {
661 transmute(src:simd_or(
662 x:transmute::<_, u32x16>(a),
663 y:transmute::<_, u32x16>(src:b),
664 ))
665 }
666}
667
668/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
669/// store the results in dst using writemask k (elements are copied from src if the corresponding
670/// bit is not set).
671///
672/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_ps&ig_expand=4839)
673#[inline]
674#[target_feature(enable = "avx512dq")]
675#[cfg_attr(test, assert_instr(vorps))]
676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
677#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
678pub const fn _mm512_mask_or_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
679 unsafe {
680 let or: Simd = _mm512_or_ps(a, b).as_f32x16();
681 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_f32x16()))
682 }
683}
684
685/// Compute the bitwise OR of packed single-precision (32-bit) floating point numbers in a and b and
686/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
687///
688/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_ps&ig_expand=4840)
689#[inline]
690#[target_feature(enable = "avx512dq")]
691#[cfg_attr(test, assert_instr(vorps))]
692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
693#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
694pub const fn _mm512_maskz_or_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
695 unsafe {
696 let or: Simd = _mm512_or_ps(a, b).as_f32x16();
697 transmute(src:simd_select_bitmask(m:k, yes:or, no:f32x16::ZERO))
698 }
699}
700
701// Xor
702
703/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
704/// and store the results in dst using writemask k (elements are copied from src if the corresponding
705/// bit is not set).
706///
707/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_pd&ig_expand=7094)
708#[inline]
709#[target_feature(enable = "avx512dq,avx512vl")]
710#[cfg_attr(test, assert_instr(vxorpd))]
711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
712#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
713pub const fn _mm_mask_xor_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
714 unsafe {
715 let xor: Simd = _mm_xor_pd(a, b).as_f64x2();
716 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f64x2()))
717 }
718}
719
720/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
721/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
722///
723/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_pd&ig_expand=7095)
724#[inline]
725#[target_feature(enable = "avx512dq,avx512vl")]
726#[cfg_attr(test, assert_instr(vxorpd))]
727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
728#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
729pub const fn _mm_maskz_xor_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
730 unsafe {
731 let xor: Simd = _mm_xor_pd(a, b).as_f64x2();
732 transmute(src:simd_select_bitmask(m:k, yes:xor, no:f64x2::ZERO))
733 }
734}
735
736/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
737/// and store the results in dst using writemask k (elements are copied from src if the corresponding
738/// bit is not set).
739///
740/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_pd&ig_expand=7097)
741#[inline]
742#[target_feature(enable = "avx512dq,avx512vl")]
743#[cfg_attr(test, assert_instr(vxorpd))]
744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
745#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
746pub const fn _mm256_mask_xor_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
747 unsafe {
748 let xor: Simd = _mm256_xor_pd(a, b).as_f64x4();
749 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f64x4()))
750 }
751}
752
753/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
754/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
755///
756/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_pd&ig_expand=7098)
757#[inline]
758#[target_feature(enable = "avx512dq,avx512vl")]
759#[cfg_attr(test, assert_instr(vxorpd))]
760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
761#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
762pub const fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
763 unsafe {
764 let xor: Simd = _mm256_xor_pd(a, b).as_f64x4();
765 transmute(src:simd_select_bitmask(m:k, yes:xor, no:f64x4::ZERO))
766 }
767}
768
769/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b
770/// and store the results in dst.
771///
772/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_pd&ig_expand=7102)
773#[inline]
774#[target_feature(enable = "avx512dq")]
775#[cfg_attr(test, assert_instr(vxorp))]
776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
777#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
778pub const fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d {
779 unsafe { transmute(src:simd_xor(x:transmute::<_, u64x8>(a), y:transmute::<_, u64x8>(src:b))) }
780}
781
782/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
783/// store the results in dst using writemask k (elements are copied from src if the corresponding
784/// bit is not set).
785///
786/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_pd&ig_expand=7100)
787#[inline]
788#[target_feature(enable = "avx512dq")]
789#[cfg_attr(test, assert_instr(vxorpd))]
790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
791#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
792pub const fn _mm512_mask_xor_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
793 unsafe {
794 let xor: Simd = _mm512_xor_pd(a, b).as_f64x8();
795 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f64x8()))
796 }
797}
798
799/// Compute the bitwise XOR of packed double-precision (64-bit) floating point numbers in a and b and
800/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
801///
802/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_pd&ig_expand=7101)
803#[inline]
804#[target_feature(enable = "avx512dq")]
805#[cfg_attr(test, assert_instr(vxorpd))]
806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
807#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
808pub const fn _mm512_maskz_xor_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
809 unsafe {
810 let xor: Simd = _mm512_xor_pd(a, b).as_f64x8();
811 transmute(src:simd_select_bitmask(m:k, yes:xor, no:f64x8::ZERO))
812 }
813}
814
815/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
816/// and store the results in dst using writemask k (elements are copied from src if the corresponding
817/// bit is not set).
818///
819/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_ps&ig_expand=7103)
820#[inline]
821#[target_feature(enable = "avx512dq,avx512vl")]
822#[cfg_attr(test, assert_instr(vxorps))]
823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
824#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
825pub const fn _mm_mask_xor_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
826 unsafe {
827 let xor: Simd = _mm_xor_ps(a, b).as_f32x4();
828 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f32x4()))
829 }
830}
831
832/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
833/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
834///
835/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_ps&ig_expand=7104)
836#[inline]
837#[target_feature(enable = "avx512dq,avx512vl")]
838#[cfg_attr(test, assert_instr(vxorps))]
839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
840#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
841pub const fn _mm_maskz_xor_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
842 unsafe {
843 let xor: Simd = _mm_xor_ps(a, b).as_f32x4();
844 transmute(src:simd_select_bitmask(m:k, yes:xor, no:f32x4::ZERO))
845 }
846}
847
848/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
849/// and store the results in dst using writemask k (elements are copied from src if the corresponding
850/// bit is not set).
851///
852/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_ps&ig_expand=7106)
853#[inline]
854#[target_feature(enable = "avx512dq,avx512vl")]
855#[cfg_attr(test, assert_instr(vxorps))]
856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
857#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
858pub const fn _mm256_mask_xor_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
859 unsafe {
860 let xor: Simd = _mm256_xor_ps(a, b).as_f32x8();
861 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f32x8()))
862 }
863}
864
865/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
866/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
867///
868/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_ps&ig_expand=7107)
869#[inline]
870#[target_feature(enable = "avx512dq,avx512vl")]
871#[cfg_attr(test, assert_instr(vxorps))]
872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
873#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
874pub const fn _mm256_maskz_xor_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
875 unsafe {
876 let xor: Simd = _mm256_xor_ps(a, b).as_f32x8();
877 transmute(src:simd_select_bitmask(m:k, yes:xor, no:f32x8::ZERO))
878 }
879}
880
881/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b
882/// and store the results in dst.
883///
884/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_ps&ig_expand=7111)
885#[inline]
886#[target_feature(enable = "avx512dq")]
887#[cfg_attr(test, assert_instr(vxorps))]
888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
889#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
890pub const fn _mm512_xor_ps(a: __m512, b: __m512) -> __m512 {
891 unsafe {
892 transmute(src:simd_xor(
893 x:transmute::<_, u32x16>(a),
894 y:transmute::<_, u32x16>(src:b),
895 ))
896 }
897}
898
899/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
900/// store the results in dst using writemask k (elements are copied from src if the corresponding
901/// bit is not set).
902///
903/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_ps&ig_expand=7109)
904#[inline]
905#[target_feature(enable = "avx512dq")]
906#[cfg_attr(test, assert_instr(vxorps))]
907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
908#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
909pub const fn _mm512_mask_xor_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
910 unsafe {
911 let xor: Simd = _mm512_xor_ps(a, b).as_f32x16();
912 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_f32x16()))
913 }
914}
915
916/// Compute the bitwise XOR of packed single-precision (32-bit) floating point numbers in a and b and
917/// store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
918///
919/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_ps&ig_expand=7110)
920#[inline]
921#[target_feature(enable = "avx512dq")]
922#[cfg_attr(test, assert_instr(vxorps))]
923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
924#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
925pub const fn _mm512_maskz_xor_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
926 unsafe {
927 let xor: Simd = _mm512_xor_ps(a, b).as_f32x16();
928 transmute(src:simd_select_bitmask(m:k, yes:xor, no:f32x16::ZERO))
929 }
930}
931
932// Broadcast
933
934/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
935/// elements of dst.
936///
937/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x2&ig_expand=509)
938#[inline]
939#[target_feature(enable = "avx512dq,avx512vl")]
940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
941#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
942pub const fn _mm256_broadcast_f32x2(a: __m128) -> __m256 {
943 unsafe {
944 let b: f32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
945 transmute(src:b)
946 }
947}
948
949/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
950/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
951///
952/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x2&ig_expand=510)
953#[inline]
954#[target_feature(enable = "avx512dq,avx512vl")]
955#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
957#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
958pub const fn _mm256_mask_broadcast_f32x2(src: __m256, k: __mmask8, a: __m128) -> __m256 {
959 unsafe {
960 let b: Simd = _mm256_broadcast_f32x2(a).as_f32x8();
961 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x8()))
962 }
963}
964
965/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
966/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
967///
968/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x2&ig_expand=511)
969#[inline]
970#[target_feature(enable = "avx512dq,avx512vl")]
971#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
973#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
974pub const fn _mm256_maskz_broadcast_f32x2(k: __mmask8, a: __m128) -> __m256 {
975 unsafe {
976 let b: Simd = _mm256_broadcast_f32x2(a).as_f32x8();
977 transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x8::ZERO))
978 }
979}
980
981/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
982/// elements of dst.
983///
984/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x2&ig_expand=512)
985#[inline]
986#[target_feature(enable = "avx512dq")]
987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
988#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
989pub const fn _mm512_broadcast_f32x2(a: __m128) -> __m512 {
990 unsafe {
991 let b: f32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
992 transmute(src:b)
993 }
994}
995
996/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
997/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
998///
999/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x2&ig_expand=513)
1000#[inline]
1001#[target_feature(enable = "avx512dq")]
1002#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
1003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1004#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1005pub const fn _mm512_mask_broadcast_f32x2(src: __m512, k: __mmask16, a: __m128) -> __m512 {
1006 unsafe {
1007 let b: Simd = _mm512_broadcast_f32x2(a).as_f32x16();
1008 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x16()))
1009 }
1010}
1011
1012/// Broadcasts the lower 2 packed single-precision (32-bit) floating-point elements from a to all
1013/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1014///
1015/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x2&ig_expand=514)
1016#[inline]
1017#[target_feature(enable = "avx512dq")]
1018#[cfg_attr(test, assert_instr(vbroadcastf32x2))]
1019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1020#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1021pub const fn _mm512_maskz_broadcast_f32x2(k: __mmask16, a: __m128) -> __m512 {
1022 unsafe {
1023 let b: Simd = _mm512_broadcast_f32x2(a).as_f32x16();
1024 transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x16::ZERO))
1025 }
1026}
1027
1028/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
1029/// elements of dst.
1030///
1031/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x8&ig_expand=521)
1032#[inline]
1033#[target_feature(enable = "avx512dq")]
1034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1035#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1036pub const fn _mm512_broadcast_f32x8(a: __m256) -> __m512 {
1037 unsafe {
1038 let b: f32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
1039 transmute(src:b)
1040 }
1041}
1042
1043/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
1044/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
1045///
1046/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x8&ig_expand=522)
1047#[inline]
1048#[target_feature(enable = "avx512dq")]
1049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1051pub const fn _mm512_mask_broadcast_f32x8(src: __m512, k: __mmask16, a: __m256) -> __m512 {
1052 unsafe {
1053 let b: Simd = _mm512_broadcast_f32x8(a).as_f32x16();
1054 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x16()))
1055 }
1056}
1057
1058/// Broadcasts the 8 packed single-precision (32-bit) floating-point elements from a to all
1059/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1060///
1061/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x8&ig_expand=523)
1062#[inline]
1063#[target_feature(enable = "avx512dq")]
1064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1065#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1066pub const fn _mm512_maskz_broadcast_f32x8(k: __mmask16, a: __m256) -> __m512 {
1067 unsafe {
1068 let b: Simd = _mm512_broadcast_f32x8(a).as_f32x16();
1069 transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x16::ZERO))
1070 }
1071}
1072
1073/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1074/// elements of dst.
1075///
1076/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f64x2&ig_expand=524)
1077#[inline]
1078#[target_feature(enable = "avx512dq,avx512vl")]
1079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1080#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1081pub const fn _mm256_broadcast_f64x2(a: __m128d) -> __m256d {
1082 unsafe {
1083 let b: f64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
1084 transmute(src:b)
1085 }
1086}
1087
1088/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1089/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
1090///
1091/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f64x2&ig_expand=525)
1092#[inline]
1093#[target_feature(enable = "avx512dq,avx512vl")]
1094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1095#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1096pub const fn _mm256_mask_broadcast_f64x2(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
1097 unsafe {
1098 let b: Simd = _mm256_broadcast_f64x2(a).as_f64x4();
1099 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x4()))
1100 }
1101}
1102
1103/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1104/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1105///
1106/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f64x2&ig_expand=526)
1107#[inline]
1108#[target_feature(enable = "avx512dq,avx512vl")]
1109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1110#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1111pub const fn _mm256_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m256d {
1112 unsafe {
1113 let b: Simd = _mm256_broadcast_f64x2(a).as_f64x4();
1114 transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x4::ZERO))
1115 }
1116}
1117
1118/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1119/// elements of dst.
1120///
1121/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x2&ig_expand=527)
1122#[inline]
1123#[target_feature(enable = "avx512dq")]
1124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1125#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1126pub const fn _mm512_broadcast_f64x2(a: __m128d) -> __m512d {
1127 unsafe {
1128 let b: f64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
1129 transmute(src:b)
1130 }
1131}
1132
1133/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1134/// elements of dst using writemask k (elements are copied from src if the corresponding bit is not set).
1135///
1136/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x2&ig_expand=528)
1137#[inline]
1138#[target_feature(enable = "avx512dq")]
1139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1140#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1141pub const fn _mm512_mask_broadcast_f64x2(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
1142 unsafe {
1143 let b: Simd = _mm512_broadcast_f64x2(a).as_f64x8();
1144 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x8()))
1145 }
1146}
1147
1148/// Broadcasts the 2 packed double-precision (64-bit) floating-point elements from a to all
1149/// elements of dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1150///
1151/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x2&ig_expand=529)
1152#[inline]
1153#[target_feature(enable = "avx512dq")]
1154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1155#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1156pub const fn _mm512_maskz_broadcast_f64x2(k: __mmask8, a: __m128d) -> __m512d {
1157 unsafe {
1158 let b: Simd = _mm512_broadcast_f64x2(a).as_f64x8();
1159 transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x8::ZERO))
1160 }
1161}
1162
1163/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
1164///
1165/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_broadcast_i32x2&ig_expand=533)
1166#[inline]
1167#[target_feature(enable = "avx512dq,avx512vl")]
1168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1169#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1170pub const fn _mm_broadcast_i32x2(a: __m128i) -> __m128i {
1171 unsafe {
1172 let a: Simd = a.as_i32x4();
1173 let b: i32x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
1174 transmute(src:b)
1175 }
1176}
1177
1178/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
1179/// (elements are copied from src if the corresponding bit is not set).
1180///
1181/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcast_i32x2&ig_expand=534)
1182#[inline]
1183#[target_feature(enable = "avx512dq,avx512vl")]
1184#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1186#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1187pub const fn _mm_mask_broadcast_i32x2(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
1188 unsafe {
1189 let b: Simd = _mm_broadcast_i32x2(a).as_i32x4();
1190 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i32x4()))
1191 }
1192}
1193
1194/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
1195/// (elements are zeroed out if the corresponding bit is not set).
1196///
1197/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcast_i32x2&ig_expand=535)
1198#[inline]
1199#[target_feature(enable = "avx512dq,avx512vl")]
1200#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1202#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1203pub const fn _mm_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m128i {
1204 unsafe {
1205 let b: Simd = _mm_broadcast_i32x2(a).as_i32x4();
1206 transmute(src:simd_select_bitmask(m:k, yes:b, no:i32x4::ZERO))
1207 }
1208}
1209
1210/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
1211///
1212/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x2&ig_expand=536)
1213#[inline]
1214#[target_feature(enable = "avx512dq,avx512vl")]
1215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1216#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1217pub const fn _mm256_broadcast_i32x2(a: __m128i) -> __m256i {
1218 unsafe {
1219 let a: Simd = a.as_i32x4();
1220 let b: i32x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
1221 transmute(src:b)
1222 }
1223}
1224
1225/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
1226/// (elements are copied from src if the corresponding bit is not set).
1227///
1228/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x2&ig_expand=537)
1229#[inline]
1230#[target_feature(enable = "avx512dq,avx512vl")]
1231#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1233#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1234pub const fn _mm256_mask_broadcast_i32x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
1235 unsafe {
1236 let b: Simd = _mm256_broadcast_i32x2(a).as_i32x8();
1237 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i32x8()))
1238 }
1239}
1240
1241/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
1242/// (elements are zeroed out if the corresponding bit is not set).
1243///
1244/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x2&ig_expand=538)
1245#[inline]
1246#[target_feature(enable = "avx512dq,avx512vl")]
1247#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1249#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1250pub const fn _mm256_maskz_broadcast_i32x2(k: __mmask8, a: __m128i) -> __m256i {
1251 unsafe {
1252 let b: Simd = _mm256_broadcast_i32x2(a).as_i32x8();
1253 transmute(src:simd_select_bitmask(m:k, yes:b, no:i32x8::ZERO))
1254 }
1255}
1256
1257/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst.
1258///
1259/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x2&ig_expand=539)
1260#[inline]
1261#[target_feature(enable = "avx512dq")]
1262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1263#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1264pub const fn _mm512_broadcast_i32x2(a: __m128i) -> __m512i {
1265 unsafe {
1266 let a: Simd = a.as_i32x4();
1267 let b: i32x16 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1]);
1268 transmute(src:b)
1269 }
1270}
1271
1272/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using writemask k
1273/// (elements are copied from src if the corresponding bit is not set).
1274///
1275/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x2&ig_expand=540)
1276#[inline]
1277#[target_feature(enable = "avx512dq")]
1278#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1280#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1281pub const fn _mm512_mask_broadcast_i32x2(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
1282 unsafe {
1283 let b: Simd = _mm512_broadcast_i32x2(a).as_i32x16();
1284 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i32x16()))
1285 }
1286}
1287
1288/// Broadcasts the lower 2 packed 32-bit integers from a to all elements of dst using zeromask k
1289/// (elements are zeroed out if the corresponding bit is not set).
1290///
1291/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x2&ig_expand=541)
1292#[inline]
1293#[target_feature(enable = "avx512dq")]
1294#[cfg_attr(test, assert_instr(vbroadcasti32x2))]
1295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1296#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1297pub const fn _mm512_maskz_broadcast_i32x2(k: __mmask16, a: __m128i) -> __m512i {
1298 unsafe {
1299 let b: Simd = _mm512_broadcast_i32x2(a).as_i32x16();
1300 transmute(src:simd_select_bitmask(m:k, yes:b, no:i32x16::ZERO))
1301 }
1302}
1303
1304/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst.
1305///
1306/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x8&ig_expand=548)
1307#[inline]
1308#[target_feature(enable = "avx512dq")]
1309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1310#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1311pub const fn _mm512_broadcast_i32x8(a: __m256i) -> __m512i {
1312 unsafe {
1313 let a: Simd = a.as_i32x8();
1314 let b: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7]);
1315 transmute(src:b)
1316 }
1317}
1318
1319/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using writemask k
1320/// (elements are copied from src if the corresponding bit is not set).
1321///
1322/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x8&ig_expand=549)
1323#[inline]
1324#[target_feature(enable = "avx512dq")]
1325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1326#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1327pub const fn _mm512_mask_broadcast_i32x8(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
1328 unsafe {
1329 let b: Simd = _mm512_broadcast_i32x8(a).as_i32x16();
1330 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i32x16()))
1331 }
1332}
1333
1334/// Broadcasts the 8 packed 32-bit integers from a to all elements of dst using zeromask k
1335/// (elements are zeroed out if the corresponding bit is not set).
1336///
1337/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x8&ig_expand=550)
1338#[inline]
1339#[target_feature(enable = "avx512dq")]
1340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1341#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1342pub const fn _mm512_maskz_broadcast_i32x8(k: __mmask16, a: __m256i) -> __m512i {
1343 unsafe {
1344 let b: Simd = _mm512_broadcast_i32x8(a).as_i32x16();
1345 transmute(src:simd_select_bitmask(m:k, yes:b, no:i32x16::ZERO))
1346 }
1347}
1348
1349/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst.
1350///
1351/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i64x2&ig_expand=551)
1352#[inline]
1353#[target_feature(enable = "avx512dq,avx512vl")]
1354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1355#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1356pub const fn _mm256_broadcast_i64x2(a: __m128i) -> __m256i {
1357 unsafe {
1358 let a: Simd = a.as_i64x2();
1359 let b: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 1]);
1360 transmute(src:b)
1361 }
1362}
1363
1364/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k
1365/// (elements are copied from src if the corresponding bit is not set).
1366///
1367/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i64x2&ig_expand=552)
1368#[inline]
1369#[target_feature(enable = "avx512dq,avx512vl")]
1370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1371#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1372pub const fn _mm256_mask_broadcast_i64x2(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
1373 unsafe {
1374 let b: Simd = _mm256_broadcast_i64x2(a).as_i64x4();
1375 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x4()))
1376 }
1377}
1378
1379/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k
1380/// (elements are zeroed out if the corresponding bit is not set).
1381///
1382/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i64x2&ig_expand=553)
1383#[inline]
1384#[target_feature(enable = "avx512dq,avx512vl")]
1385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1386#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1387pub const fn _mm256_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m256i {
1388 unsafe {
1389 let b: Simd = _mm256_broadcast_i64x2(a).as_i64x4();
1390 transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x4::ZERO))
1391 }
1392}
1393
1394/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst.
1395///
1396/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x2&ig_expand=554)
1397#[inline]
1398#[target_feature(enable = "avx512dq")]
1399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1400#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1401pub const fn _mm512_broadcast_i64x2(a: __m128i) -> __m512i {
1402 unsafe {
1403 let a: Simd = a.as_i64x2();
1404 let b: i64x8 = simd_shuffle!(a, a, [0, 1, 0, 1, 0, 1, 0, 1]);
1405 transmute(src:b)
1406 }
1407}
1408
1409/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using writemask k
1410/// (elements are copied from src if the corresponding bit is not set).
1411///
1412/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x2&ig_expand=555)
1413#[inline]
1414#[target_feature(enable = "avx512dq")]
1415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1416#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1417pub const fn _mm512_mask_broadcast_i64x2(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
1418 unsafe {
1419 let b: Simd = _mm512_broadcast_i64x2(a).as_i64x8();
1420 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x8()))
1421 }
1422}
1423
1424/// Broadcasts the 2 packed 64-bit integers from a to all elements of dst using zeromask k
1425/// (elements are zeroed out if the corresponding bit is not set).
1426///
1427/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x2&ig_expand=556)
1428#[inline]
1429#[target_feature(enable = "avx512dq")]
1430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1431#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1432pub const fn _mm512_maskz_broadcast_i64x2(k: __mmask8, a: __m128i) -> __m512i {
1433 unsafe {
1434 let b: Simd = _mm512_broadcast_i64x2(a).as_i64x8();
1435 transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x8::ZERO))
1436 }
1437}
1438
1439// Extract
1440
1441/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1442/// selected with IMM8, and stores the result in dst.
1443///
1444/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x8_ps&ig_expand=2946)
1445#[inline]
1446#[target_feature(enable = "avx512dq")]
1447#[rustc_legacy_const_generics(1)]
1448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1449#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1450pub const fn _mm512_extractf32x8_ps<const IMM8: i32>(a: __m512) -> __m256 {
1451 unsafe {
1452 static_assert_uimm_bits!(IMM8, 1);
1453 match IMM8 & 1 {
1454 0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
1455 _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
1456 }
1457 }
1458}
1459
1460/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1461/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1462/// if the corresponding bit is not set).
1463///
1464/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x8_ps&ig_expand=2947)
1465#[inline]
1466#[target_feature(enable = "avx512dq")]
1467#[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))]
1468#[rustc_legacy_const_generics(3)]
1469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1470#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1471pub const fn _mm512_mask_extractf32x8_ps<const IMM8: i32>(
1472 src: __m256,
1473 k: __mmask8,
1474 a: __m512,
1475) -> __m256 {
1476 unsafe {
1477 static_assert_uimm_bits!(IMM8, 1);
1478 let b: __m256 = _mm512_extractf32x8_ps::<IMM8>(a);
1479 transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x8(), no:src.as_f32x8()))
1480 }
1481}
1482
1483/// Extracts 256 bits (composed of 8 packed single-precision (32-bit) floating-point elements) from a,
1484/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1485/// corresponding bit is not set).
1486///
1487/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x8_ps&ig_expand=2948)
1488#[inline]
1489#[target_feature(enable = "avx512dq")]
1490#[cfg_attr(test, assert_instr(vextractf32x8, IMM8 = 1))]
1491#[rustc_legacy_const_generics(2)]
1492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1493#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1494pub const fn _mm512_maskz_extractf32x8_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m256 {
1495 unsafe {
1496 static_assert_uimm_bits!(IMM8, 1);
1497 let b: __m256 = _mm512_extractf32x8_ps::<IMM8>(a);
1498 transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x8(), no:f32x8::ZERO))
1499 }
1500}
1501
1502/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1503/// selected with IMM8, and stores the result in dst.
1504///
1505/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf64x2_pd&ig_expand=2949)
1506#[inline]
1507#[target_feature(enable = "avx512dq,avx512vl")]
1508#[rustc_legacy_const_generics(1)]
1509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1510#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1511pub const fn _mm256_extractf64x2_pd<const IMM8: i32>(a: __m256d) -> __m128d {
1512 unsafe {
1513 static_assert_uimm_bits!(IMM8, 1);
1514 match IMM8 & 1 {
1515 0 => simd_shuffle!(a, a, [0, 1]),
1516 _ => simd_shuffle!(a, a, [2, 3]),
1517 }
1518 }
1519}
1520
1521/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1522/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1523/// if the corresponding bit is not set).
1524///
1525/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf64x2_pd&ig_expand=2950)
1526#[inline]
1527#[target_feature(enable = "avx512dq,avx512vl")]
1528#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))]
1529#[rustc_legacy_const_generics(3)]
1530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1531#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1532pub const fn _mm256_mask_extractf64x2_pd<const IMM8: i32>(
1533 src: __m128d,
1534 k: __mmask8,
1535 a: __m256d,
1536) -> __m128d {
1537 unsafe {
1538 static_assert_uimm_bits!(IMM8, 1);
1539 let b: __m128d = _mm256_extractf64x2_pd::<IMM8>(a);
1540 transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x2(), no:src.as_f64x2()))
1541 }
1542}
1543
1544/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1545/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1546/// corresponding bit is not set).
1547///
1548/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf64x2_pd&ig_expand=2951)
1549#[inline]
1550#[target_feature(enable = "avx512dq,avx512vl")]
1551#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 1))]
1552#[rustc_legacy_const_generics(2)]
1553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1554#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1555pub const fn _mm256_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m128d {
1556 unsafe {
1557 static_assert_uimm_bits!(IMM8, 1);
1558 let b: __m128d = _mm256_extractf64x2_pd::<IMM8>(a);
1559 transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x2(), no:f64x2::ZERO))
1560 }
1561}
1562
1563/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1564/// selected with IMM8, and stores the result in dst.
1565///
1566/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x2_pd&ig_expand=2952)
1567#[inline]
1568#[target_feature(enable = "avx512dq")]
1569#[rustc_legacy_const_generics(1)]
1570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1571#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1572pub const fn _mm512_extractf64x2_pd<const IMM8: i32>(a: __m512d) -> __m128d {
1573 unsafe {
1574 static_assert_uimm_bits!(IMM8, 2);
1575 match IMM8 & 3 {
1576 0 => simd_shuffle!(a, a, [0, 1]),
1577 1 => simd_shuffle!(a, a, [2, 3]),
1578 2 => simd_shuffle!(a, a, [4, 5]),
1579 _ => simd_shuffle!(a, a, [6, 7]),
1580 }
1581 }
1582}
1583
1584/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1585/// selected with IMM8, and stores the result in dst using writemask k (elements are copied from src
1586/// if the corresponding bit is not set).
1587///
1588/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x2_pd&ig_expand=2953)
1589#[inline]
1590#[target_feature(enable = "avx512dq")]
1591#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))]
1592#[rustc_legacy_const_generics(3)]
1593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1594#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1595pub const fn _mm512_mask_extractf64x2_pd<const IMM8: i32>(
1596 src: __m128d,
1597 k: __mmask8,
1598 a: __m512d,
1599) -> __m128d {
1600 unsafe {
1601 static_assert_uimm_bits!(IMM8, 2);
1602 let b: Simd = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2();
1603 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x2()))
1604 }
1605}
1606
1607/// Extracts 128 bits (composed of 2 packed double-precision (64-bit) floating-point elements) from a,
1608/// selected with IMM8, and stores the result in dst using zeromask k (elements are zeroed out if the
1609/// corresponding bit is not set).
1610///
1611/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x2_pd&ig_expand=2954)
1612#[inline]
1613#[target_feature(enable = "avx512dq")]
1614#[cfg_attr(test, assert_instr(vextractf64x2, IMM8 = 3))]
1615#[rustc_legacy_const_generics(2)]
1616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1617#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1618pub const fn _mm512_maskz_extractf64x2_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m128d {
1619 unsafe {
1620 static_assert_uimm_bits!(IMM8, 2);
1621 let b: Simd = _mm512_extractf64x2_pd::<IMM8>(a).as_f64x2();
1622 transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x2::ZERO))
1623 }
1624}
1625
1626/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1627/// the result in dst.
1628///
1629/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x8_epi32&ig_expand=2965)
1630#[inline]
1631#[target_feature(enable = "avx512dq")]
1632#[rustc_legacy_const_generics(1)]
1633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1634#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1635pub const fn _mm512_extracti32x8_epi32<const IMM8: i32>(a: __m512i) -> __m256i {
1636 unsafe {
1637 static_assert_uimm_bits!(IMM8, 1);
1638 let a: Simd = a.as_i32x16();
1639 let b: i32x8 = match IMM8 & 1 {
1640 0 => simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
1641 _ => simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
1642 };
1643 transmute(src:b)
1644 }
1645}
1646
1647/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1648/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1649///
1650/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x8_epi32&ig_expand=2966)
1651#[inline]
1652#[target_feature(enable = "avx512dq")]
1653#[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))]
1654#[rustc_legacy_const_generics(3)]
1655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1656#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1657pub const fn _mm512_mask_extracti32x8_epi32<const IMM8: i32>(
1658 src: __m256i,
1659 k: __mmask8,
1660 a: __m512i,
1661) -> __m256i {
1662 unsafe {
1663 static_assert_uimm_bits!(IMM8, 1);
1664 let b: Simd = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8();
1665 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i32x8()))
1666 }
1667}
1668
1669/// Extracts 256 bits (composed of 8 packed 32-bit integers) from a, selected with IMM8, and stores
1670/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1671///
1672/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x8_epi32&ig_expand=2967)
1673#[inline]
1674#[target_feature(enable = "avx512dq")]
1675#[cfg_attr(test, assert_instr(vextracti32x8, IMM8 = 1))]
1676#[rustc_legacy_const_generics(2)]
1677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1678#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1679pub const fn _mm512_maskz_extracti32x8_epi32<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m256i {
1680 unsafe {
1681 static_assert_uimm_bits!(IMM8, 1);
1682 let b: Simd = _mm512_extracti32x8_epi32::<IMM8>(a).as_i32x8();
1683 transmute(src:simd_select_bitmask(m:k, yes:b, no:i32x8::ZERO))
1684 }
1685}
1686
1687/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1688/// the result in dst.
1689///
1690/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti64x2_epi64&ig_expand=2968)
1691#[inline]
1692#[target_feature(enable = "avx512dq,avx512vl")]
1693#[rustc_legacy_const_generics(1)]
1694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1695#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1696pub const fn _mm256_extracti64x2_epi64<const IMM8: i32>(a: __m256i) -> __m128i {
1697 unsafe {
1698 static_assert_uimm_bits!(IMM8, 1);
1699 let a: Simd = a.as_i64x4();
1700 match IMM8 & 1 {
1701 0 => simd_shuffle!(a, a, [0, 1]),
1702 _ => simd_shuffle!(a, a, [2, 3]),
1703 }
1704 }
1705}
1706
1707/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1708/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1709///
1710/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti64x2_epi64&ig_expand=2969)
1711#[inline]
1712#[target_feature(enable = "avx512dq,avx512vl")]
1713#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))]
1714#[rustc_legacy_const_generics(3)]
1715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1716#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1717pub const fn _mm256_mask_extracti64x2_epi64<const IMM8: i32>(
1718 src: __m128i,
1719 k: __mmask8,
1720 a: __m256i,
1721) -> __m128i {
1722 unsafe {
1723 static_assert_uimm_bits!(IMM8, 1);
1724 let b: Simd = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1725 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x2()))
1726 }
1727}
1728
1729/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1730/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1731///
1732/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti64x2_epi64&ig_expand=2970)
1733#[inline]
1734#[target_feature(enable = "avx512dq,avx512vl")]
1735#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 1))]
1736#[rustc_legacy_const_generics(2)]
1737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1738#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1739pub const fn _mm256_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m128i {
1740 unsafe {
1741 static_assert_uimm_bits!(IMM8, 1);
1742 let b: Simd = _mm256_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1743 transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x2::ZERO))
1744 }
1745}
1746
1747/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1748/// the result in dst.
1749///
1750/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x2_epi64&ig_expand=2971)
1751#[inline]
1752#[target_feature(enable = "avx512dq")]
1753#[rustc_legacy_const_generics(1)]
1754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1755#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1756pub const fn _mm512_extracti64x2_epi64<const IMM8: i32>(a: __m512i) -> __m128i {
1757 unsafe {
1758 static_assert_uimm_bits!(IMM8, 2);
1759 let a: Simd = a.as_i64x8();
1760 match IMM8 & 3 {
1761 0 => simd_shuffle!(a, a, [0, 1]),
1762 1 => simd_shuffle!(a, a, [2, 3]),
1763 2 => simd_shuffle!(a, a, [4, 5]),
1764 _ => simd_shuffle!(a, a, [6, 7]),
1765 }
1766 }
1767}
1768
1769/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1770/// the result in dst using writemask k (elements are copied from src if the corresponding bit is not set).
1771///
1772/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x2_epi64&ig_expand=2972)
1773#[inline]
1774#[target_feature(enable = "avx512dq")]
1775#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))]
1776#[rustc_legacy_const_generics(3)]
1777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1778#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1779pub const fn _mm512_mask_extracti64x2_epi64<const IMM8: i32>(
1780 src: __m128i,
1781 k: __mmask8,
1782 a: __m512i,
1783) -> __m128i {
1784 unsafe {
1785 static_assert_uimm_bits!(IMM8, 2);
1786 let b: Simd = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1787 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x2()))
1788 }
1789}
1790
1791/// Extracts 128 bits (composed of 2 packed 64-bit integers) from a, selected with IMM8, and stores
1792/// the result in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
1793///
1794/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x2_epi64&ig_expand=2973)
1795#[inline]
1796#[target_feature(enable = "avx512dq")]
1797#[cfg_attr(test, assert_instr(vextracti64x2, IMM8 = 3))]
1798#[rustc_legacy_const_generics(2)]
1799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1800#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1801pub const fn _mm512_maskz_extracti64x2_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m128i {
1802 unsafe {
1803 static_assert_uimm_bits!(IMM8, 2);
1804 let b: Simd = _mm512_extracti64x2_epi64::<IMM8>(a).as_i64x2();
1805 transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x2::ZERO))
1806 }
1807}
1808
1809// Insert
1810
1811/// Copy a to dst, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1812/// elements) from b into dst at the location specified by IMM8.
1813///
1814/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x8&ig_expand=3850)
1815#[inline]
1816#[target_feature(enable = "avx512dq")]
1817#[rustc_legacy_const_generics(2)]
1818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1819#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1820pub const fn _mm512_insertf32x8<const IMM8: i32>(a: __m512, b: __m256) -> __m512 {
1821 unsafe {
1822 static_assert_uimm_bits!(IMM8, 1);
1823 let b: __m512 = _mm512_castps256_ps512(b);
1824 match IMM8 & 1 {
1825 0 => {
1826 simd_shuffle!(
1827 a,
1828 b,
1829 [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15]
1830 )
1831 }
1832 _ => {
1833 simd_shuffle!(
1834 a,
1835 b,
1836 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
1837 )
1838 }
1839 }
1840 }
1841}
1842
1843/// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1844/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1845/// (elements are copied from src if the corresponding bit is not set).
1846///
1847/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x8&ig_expand=3851)
1848#[inline]
1849#[target_feature(enable = "avx512dq")]
1850#[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))]
1851#[rustc_legacy_const_generics(4)]
1852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1853#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1854pub const fn _mm512_mask_insertf32x8<const IMM8: i32>(
1855 src: __m512,
1856 k: __mmask16,
1857 a: __m512,
1858 b: __m256,
1859) -> __m512 {
1860 unsafe {
1861 static_assert_uimm_bits!(IMM8, 1);
1862 let c: __m512 = _mm512_insertf32x8::<IMM8>(a, b);
1863 transmute(src:simd_select_bitmask(m:k, yes:c.as_f32x16(), no:src.as_f32x16()))
1864 }
1865}
1866
1867/// Copy a to tmp, then insert 256 bits (composed of 8 packed single-precision (32-bit) floating-point
1868/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
1869/// (elements are zeroed out if the corresponding bit is not set).
1870///
1871/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x8&ig_expand=3852)
1872#[inline]
1873#[target_feature(enable = "avx512dq")]
1874#[cfg_attr(test, assert_instr(vinsertf32x8, IMM8 = 1))]
1875#[rustc_legacy_const_generics(3)]
1876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1877#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1878pub const fn _mm512_maskz_insertf32x8<const IMM8: i32>(
1879 k: __mmask16,
1880 a: __m512,
1881 b: __m256,
1882) -> __m512 {
1883 unsafe {
1884 static_assert_uimm_bits!(IMM8, 1);
1885 let c: Simd = _mm512_insertf32x8::<IMM8>(a, b).as_f32x16();
1886 transmute(src:simd_select_bitmask(m:k, yes:c, no:f32x16::ZERO))
1887 }
1888}
1889
1890/// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1891/// elements) from b into dst at the location specified by IMM8.
1892///
1893/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf64x2&ig_expand=3853)
1894#[inline]
1895#[target_feature(enable = "avx512dq,avx512vl")]
1896#[rustc_legacy_const_generics(2)]
1897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1898#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1899pub const fn _mm256_insertf64x2<const IMM8: i32>(a: __m256d, b: __m128d) -> __m256d {
1900 unsafe {
1901 static_assert_uimm_bits!(IMM8, 1);
1902 let b: __m256d = _mm256_castpd128_pd256(b);
1903 match IMM8 & 1 {
1904 0 => simd_shuffle!(a, b, [4, 5, 2, 3]),
1905 _ => simd_shuffle!(a, b, [0, 1, 4, 5]),
1906 }
1907 }
1908}
1909
1910/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1911/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1912/// (elements are copied from src if the corresponding bit is not set).
1913///
1914/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf64x2&ig_expand=3854)
1915#[inline]
1916#[target_feature(enable = "avx512dq,avx512vl")]
1917#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))]
1918#[rustc_legacy_const_generics(4)]
1919#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1920#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1921pub const fn _mm256_mask_insertf64x2<const IMM8: i32>(
1922 src: __m256d,
1923 k: __mmask8,
1924 a: __m256d,
1925 b: __m128d,
1926) -> __m256d {
1927 unsafe {
1928 static_assert_uimm_bits!(IMM8, 1);
1929 let c: __m256d = _mm256_insertf64x2::<IMM8>(a, b);
1930 transmute(src:simd_select_bitmask(m:k, yes:c.as_f64x4(), no:src.as_f64x4()))
1931 }
1932}
1933
1934/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1935/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
1936/// (elements are zeroed out if the corresponding bit is not set).
1937///
1938/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf64x2&ig_expand=3855)
1939#[inline]
1940#[target_feature(enable = "avx512dq,avx512vl")]
1941#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 1))]
1942#[rustc_legacy_const_generics(3)]
1943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1944#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1945pub const fn _mm256_maskz_insertf64x2<const IMM8: i32>(
1946 k: __mmask8,
1947 a: __m256d,
1948 b: __m128d,
1949) -> __m256d {
1950 unsafe {
1951 static_assert_uimm_bits!(IMM8, 1);
1952 let c: Simd = _mm256_insertf64x2::<IMM8>(a, b).as_f64x4();
1953 transmute(src:simd_select_bitmask(m:k, yes:c, no:f64x4::ZERO))
1954 }
1955}
1956
1957/// Copy a to dst, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1958/// elements) from b into dst at the location specified by IMM8.
1959///
1960/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x2&ig_expand=3856)
1961#[inline]
1962#[target_feature(enable = "avx512dq")]
1963#[rustc_legacy_const_generics(2)]
1964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1965#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1966pub const fn _mm512_insertf64x2<const IMM8: i32>(a: __m512d, b: __m128d) -> __m512d {
1967 unsafe {
1968 static_assert_uimm_bits!(IMM8, 2);
1969 let b: __m512d = _mm512_castpd128_pd512(b);
1970 match IMM8 & 3 {
1971 0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]),
1972 1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]),
1973 2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]),
1974 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]),
1975 }
1976 }
1977}
1978
1979/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
1980/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using writemask k
1981/// (elements are copied from src if the corresponding bit is not set).
1982///
1983/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x2&ig_expand=3857)
1984#[inline]
1985#[target_feature(enable = "avx512dq")]
1986#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))]
1987#[rustc_legacy_const_generics(4)]
1988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1989#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
1990pub const fn _mm512_mask_insertf64x2<const IMM8: i32>(
1991 src: __m512d,
1992 k: __mmask8,
1993 a: __m512d,
1994 b: __m128d,
1995) -> __m512d {
1996 unsafe {
1997 static_assert_uimm_bits!(IMM8, 2);
1998 let c: __m512d = _mm512_insertf64x2::<IMM8>(a, b);
1999 transmute(src:simd_select_bitmask(m:k, yes:c.as_f64x8(), no:src.as_f64x8()))
2000 }
2001}
2002
2003/// Copy a to tmp, then insert 128 bits (composed of 2 packed double-precision (64-bit) floating-point
2004/// elements) from b into tmp at the location specified by IMM8, and copy tmp to dst using zeromask k
2005/// (elements are zeroed out if the corresponding bit is not set).
2006///
2007/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x2&ig_expand=3858)
2008#[inline]
2009#[target_feature(enable = "avx512dq")]
2010#[cfg_attr(test, assert_instr(vinsertf64x2, IMM8 = 3))]
2011#[rustc_legacy_const_generics(3)]
2012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2013#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2014pub const fn _mm512_maskz_insertf64x2<const IMM8: i32>(
2015 k: __mmask8,
2016 a: __m512d,
2017 b: __m128d,
2018) -> __m512d {
2019 unsafe {
2020 static_assert_uimm_bits!(IMM8, 2);
2021 let c: Simd = _mm512_insertf64x2::<IMM8>(a, b).as_f64x8();
2022 transmute(src:simd_select_bitmask(m:k, yes:c, no:f64x8::ZERO))
2023 }
2024}
2025
2026/// Copy a to dst, then insert 256 bits (composed of 8 packed 32-bit integers) from b into dst at the
2027/// location specified by IMM8.
2028///
2029/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x8&ig_expand=3869)
2030#[inline]
2031#[target_feature(enable = "avx512dq")]
2032#[rustc_legacy_const_generics(2)]
2033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2034#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2035pub const fn _mm512_inserti32x8<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
2036 unsafe {
2037 static_assert_uimm_bits!(IMM8, 1);
2038 let a: Simd = a.as_i32x16();
2039 let b: Simd = _mm512_castsi256_si512(b).as_i32x16();
2040 let r: i32x16 = match IMM8 & 1 {
2041 0 => {
2042 simd_shuffle!(
2043 a,
2044 b,
2045 [16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15]
2046 )
2047 }
2048 _ => {
2049 simd_shuffle!(
2050 a,
2051 b,
2052 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23]
2053 )
2054 }
2055 };
2056 transmute(src:r)
2057 }
2058}
2059
2060/// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the
2061/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
2062/// the corresponding bit is not set).
2063///
2064/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x8&ig_expand=3870)
2065#[inline]
2066#[target_feature(enable = "avx512dq")]
2067#[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))]
2068#[rustc_legacy_const_generics(4)]
2069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2070#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2071pub const fn _mm512_mask_inserti32x8<const IMM8: i32>(
2072 src: __m512i,
2073 k: __mmask16,
2074 a: __m512i,
2075 b: __m256i,
2076) -> __m512i {
2077 unsafe {
2078 static_assert_uimm_bits!(IMM8, 1);
2079 let c: __m512i = _mm512_inserti32x8::<IMM8>(a, b);
2080 transmute(src:simd_select_bitmask(m:k, yes:c.as_i32x16(), no:src.as_i32x16()))
2081 }
2082}
2083
2084/// Copy a to tmp, then insert 256 bits (composed of 8 packed 32-bit integers) from b into tmp at the
2085/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
2086/// corresponding bit is not set).
2087///
2088/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x8&ig_expand=3871)
2089#[inline]
2090#[target_feature(enable = "avx512dq")]
2091#[cfg_attr(test, assert_instr(vinserti32x8, IMM8 = 1))]
2092#[rustc_legacy_const_generics(3)]
2093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2094#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2095pub const fn _mm512_maskz_inserti32x8<const IMM8: i32>(
2096 k: __mmask16,
2097 a: __m512i,
2098 b: __m256i,
2099) -> __m512i {
2100 unsafe {
2101 static_assert_uimm_bits!(IMM8, 1);
2102 let c: Simd = _mm512_inserti32x8::<IMM8>(a, b).as_i32x16();
2103 transmute(src:simd_select_bitmask(m:k, yes:c, no:i32x16::ZERO))
2104 }
2105}
2106
2107/// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the
2108/// location specified by IMM8.
2109///
2110/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti64x2&ig_expand=3872)
2111#[inline]
2112#[target_feature(enable = "avx512dq,avx512vl")]
2113#[rustc_legacy_const_generics(2)]
2114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2115#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2116pub const fn _mm256_inserti64x2<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
2117 unsafe {
2118 static_assert_uimm_bits!(IMM8, 1);
2119 let a: Simd = a.as_i64x4();
2120 let b: Simd = _mm256_castsi128_si256(b).as_i64x4();
2121 match IMM8 & 1 {
2122 0 => simd_shuffle!(a, b, [4, 5, 2, 3]),
2123 _ => simd_shuffle!(a, b, [0, 1, 4, 5]),
2124 }
2125 }
2126}
2127
2128/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2129/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
2130/// the corresponding bit is not set).
2131///
2132/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti64x2&ig_expand=3873)
2133#[inline]
2134#[target_feature(enable = "avx512dq,avx512vl")]
2135#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))]
2136#[rustc_legacy_const_generics(4)]
2137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2138#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2139pub const fn _mm256_mask_inserti64x2<const IMM8: i32>(
2140 src: __m256i,
2141 k: __mmask8,
2142 a: __m256i,
2143 b: __m128i,
2144) -> __m256i {
2145 unsafe {
2146 static_assert_uimm_bits!(IMM8, 1);
2147 let c: __m256i = _mm256_inserti64x2::<IMM8>(a, b);
2148 transmute(src:simd_select_bitmask(m:k, yes:c.as_i64x4(), no:src.as_i64x4()))
2149 }
2150}
2151
2152/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2153/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
2154/// corresponding bit is not set).
2155///
2156/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti64x2&ig_expand=3874)
2157#[inline]
2158#[target_feature(enable = "avx512dq,avx512vl")]
2159#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 1))]
2160#[rustc_legacy_const_generics(3)]
2161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2162#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2163pub const fn _mm256_maskz_inserti64x2<const IMM8: i32>(
2164 k: __mmask8,
2165 a: __m256i,
2166 b: __m128i,
2167) -> __m256i {
2168 unsafe {
2169 static_assert_uimm_bits!(IMM8, 1);
2170 let c: Simd = _mm256_inserti64x2::<IMM8>(a, b).as_i64x4();
2171 transmute(src:simd_select_bitmask(m:k, yes:c, no:i64x4::ZERO))
2172 }
2173}
2174
2175/// Copy a to dst, then insert 128 bits (composed of 2 packed 64-bit integers) from b into dst at the
2176/// location specified by IMM8.
2177///
2178/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x2&ig_expand=3875)
2179#[inline]
2180#[target_feature(enable = "avx512dq")]
2181#[rustc_legacy_const_generics(2)]
2182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2183#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2184pub const fn _mm512_inserti64x2<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
2185 unsafe {
2186 static_assert_uimm_bits!(IMM8, 2);
2187 let a: Simd = a.as_i64x8();
2188 let b: Simd = _mm512_castsi128_si512(b).as_i64x8();
2189 match IMM8 & 3 {
2190 0 => simd_shuffle!(a, b, [8, 9, 2, 3, 4, 5, 6, 7]),
2191 1 => simd_shuffle!(a, b, [0, 1, 8, 9, 4, 5, 6, 7]),
2192 2 => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 6, 7]),
2193 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 8, 9]),
2194 }
2195 }
2196}
2197
2198/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2199/// location specified by IMM8, and copy tmp to dst using writemask k (elements are copied from src if
2200/// the corresponding bit is not set).
2201///
2202/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x2&ig_expand=3876)
2203#[inline]
2204#[target_feature(enable = "avx512dq")]
2205#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))]
2206#[rustc_legacy_const_generics(4)]
2207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2208#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2209pub const fn _mm512_mask_inserti64x2<const IMM8: i32>(
2210 src: __m512i,
2211 k: __mmask8,
2212 a: __m512i,
2213 b: __m128i,
2214) -> __m512i {
2215 unsafe {
2216 static_assert_uimm_bits!(IMM8, 2);
2217 let c: __m512i = _mm512_inserti64x2::<IMM8>(a, b);
2218 transmute(src:simd_select_bitmask(m:k, yes:c.as_i64x8(), no:src.as_i64x8()))
2219 }
2220}
2221
2222/// Copy a to tmp, then insert 128 bits (composed of 2 packed 64-bit integers) from b into tmp at the
2223/// location specified by IMM8, and copy tmp to dst using zeromask k (elements are zeroed out if the
2224/// corresponding bit is not set).
2225///
2226/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x2&ig_expand=3877)
2227#[inline]
2228#[target_feature(enable = "avx512dq")]
2229#[cfg_attr(test, assert_instr(vinserti64x2, IMM8 = 3))]
2230#[rustc_legacy_const_generics(3)]
2231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2232#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
2233pub const fn _mm512_maskz_inserti64x2<const IMM8: i32>(
2234 k: __mmask8,
2235 a: __m512i,
2236 b: __m128i,
2237) -> __m512i {
2238 unsafe {
2239 static_assert_uimm_bits!(IMM8, 2);
2240 let c: Simd = _mm512_inserti64x2::<IMM8>(a, b).as_i64x8();
2241 transmute(src:simd_select_bitmask(m:k, yes:c, no:i64x8::ZERO))
2242 }
2243}
2244
2245// Convert
2246
2247/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2248/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2249///
2250/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2251/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2252/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2253/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2254/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2255///
2256/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_pd&ig_expand=1437)
2257#[inline]
2258#[target_feature(enable = "avx512dq")]
2259#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
2260#[rustc_legacy_const_generics(1)]
2261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2262pub fn _mm512_cvt_roundepi64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d {
2263 unsafe {
2264 static_assert_rounding!(ROUNDING);
2265 transmute(src:vcvtqq2pd_512(a.as_i64x8(), ROUNDING))
2266 }
2267}
2268
2269/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2270/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2271/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2272///
2273/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2274/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2275/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2276/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2277/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2278///
2279/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_pd&ig_expand=1438)
2280#[inline]
2281#[target_feature(enable = "avx512dq")]
2282#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
2283#[rustc_legacy_const_generics(3)]
2284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2285pub fn _mm512_mask_cvt_roundepi64_pd<const ROUNDING: i32>(
2286 src: __m512d,
2287 k: __mmask8,
2288 a: __m512i,
2289) -> __m512d {
2290 unsafe {
2291 static_assert_rounding!(ROUNDING);
2292 let b: Simd = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8();
2293 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x8()))
2294 }
2295}
2296
2297/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2298/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2299/// Rounding is done according to the ROUNDING parameter, which can be one of:
2300///
2301/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2302/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2303/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2304/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2305/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2306///
2307/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_pd&ig_expand=1439)
2308#[inline]
2309#[target_feature(enable = "avx512dq")]
2310#[cfg_attr(test, assert_instr(vcvtqq2pd, ROUNDING = 8))]
2311#[rustc_legacy_const_generics(2)]
2312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2313pub fn _mm512_maskz_cvt_roundepi64_pd<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m512d {
2314 unsafe {
2315 static_assert_rounding!(ROUNDING);
2316 let b: Simd = _mm512_cvt_roundepi64_pd::<ROUNDING>(a).as_f64x8();
2317 transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x8::ZERO))
2318 }
2319}
2320
2321/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2322/// and store the results in dst.
2323///
2324/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_pd&ig_expand=1705)
2325#[inline]
2326#[target_feature(enable = "avx512dq,avx512vl")]
2327#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2329pub fn _mm_cvtepi64_pd(a: __m128i) -> __m128d {
2330 unsafe { transmute(src:vcvtqq2pd_128(a.as_i64x2(), _MM_FROUND_CUR_DIRECTION)) }
2331}
2332
2333/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2334/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2335/// not set).
2336///
2337/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_pd&ig_expand=1706)
2338#[inline]
2339#[target_feature(enable = "avx512dq,avx512vl")]
2340#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2342pub fn _mm_mask_cvtepi64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
2343 unsafe {
2344 let b: Simd = _mm_cvtepi64_pd(a).as_f64x2();
2345 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x2()))
2346 }
2347}
2348
2349/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2350/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2351///
2352/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_pd&ig_expand=1707)
2353#[inline]
2354#[target_feature(enable = "avx512dq,avx512vl")]
2355#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2357pub fn _mm_maskz_cvtepi64_pd(k: __mmask8, a: __m128i) -> __m128d {
2358 unsafe {
2359 let b: Simd = _mm_cvtepi64_pd(a).as_f64x2();
2360 transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x2::ZERO))
2361 }
2362}
2363
2364/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2365/// and store the results in dst.
2366///
2367/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_pd&ig_expand=1708)
2368#[inline]
2369#[target_feature(enable = "avx512dq,avx512vl")]
2370#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2372pub fn _mm256_cvtepi64_pd(a: __m256i) -> __m256d {
2373 unsafe { transmute(src:vcvtqq2pd_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) }
2374}
2375
2376/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2377/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2378/// not set).
2379///
2380/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_pd&ig_expand=1709)
2381#[inline]
2382#[target_feature(enable = "avx512dq,avx512vl")]
2383#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2385pub fn _mm256_mask_cvtepi64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d {
2386 unsafe {
2387 let b: Simd = _mm256_cvtepi64_pd(a).as_f64x4();
2388 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x4()))
2389 }
2390}
2391
2392/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2393/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2394///
2395/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_pd&ig_expand=1710)
2396#[inline]
2397#[target_feature(enable = "avx512dq,avx512vl")]
2398#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2400pub fn _mm256_maskz_cvtepi64_pd(k: __mmask8, a: __m256i) -> __m256d {
2401 unsafe {
2402 let b: Simd = _mm256_cvtepi64_pd(a).as_f64x4();
2403 transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x4::ZERO))
2404 }
2405}
2406
2407/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2408/// and store the results in dst.
2409///
2410/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_pd&ig_expand=1711)
2411#[inline]
2412#[target_feature(enable = "avx512dq")]
2413#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2415pub fn _mm512_cvtepi64_pd(a: __m512i) -> __m512d {
2416 unsafe { transmute(src:vcvtqq2pd_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) }
2417}
2418
2419/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2420/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2421/// not set).
2422///
2423/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_pd&ig_expand=1712)
2424#[inline]
2425#[target_feature(enable = "avx512dq")]
2426#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2428pub fn _mm512_mask_cvtepi64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d {
2429 unsafe {
2430 let b: Simd = _mm512_cvtepi64_pd(a).as_f64x8();
2431 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x8()))
2432 }
2433}
2434
2435/// Convert packed signed 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2436/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2437///
2438/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_pd&ig_expand=1713)
2439#[inline]
2440#[target_feature(enable = "avx512dq")]
2441#[cfg_attr(test, assert_instr(vcvtqq2pd))]
2442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2443pub fn _mm512_maskz_cvtepi64_pd(k: __mmask8, a: __m512i) -> __m512d {
2444 unsafe {
2445 let b: Simd = _mm512_cvtepi64_pd(a).as_f64x8();
2446 transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x8::ZERO))
2447 }
2448}
2449
2450/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2451/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2452///
2453/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2454/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2455/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2456/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2457/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2458///
2459/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi64_ps&ig_expand=1443)
2460#[inline]
2461#[target_feature(enable = "avx512dq")]
2462#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
2463#[rustc_legacy_const_generics(1)]
2464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2465pub fn _mm512_cvt_roundepi64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 {
2466 unsafe {
2467 static_assert_rounding!(ROUNDING);
2468 transmute(src:vcvtqq2ps_512(a.as_i64x8(), ROUNDING))
2469 }
2470}
2471
2472/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2473/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2474/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2475///
2476/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2477/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2478/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2479/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2480/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2481///
2482/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi64_ps&ig_expand=1444)
2483#[inline]
2484#[target_feature(enable = "avx512dq")]
2485#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
2486#[rustc_legacy_const_generics(3)]
2487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2488pub fn _mm512_mask_cvt_roundepi64_ps<const ROUNDING: i32>(
2489 src: __m256,
2490 k: __mmask8,
2491 a: __m512i,
2492) -> __m256 {
2493 unsafe {
2494 static_assert_rounding!(ROUNDING);
2495 let b: Simd = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8();
2496 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x8()))
2497 }
2498}
2499
2500/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2501/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2502/// Rounding is done according to the ROUNDING parameter, which can be one of:
2503///
2504/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2505/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2506/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2507/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2508/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2509///
2510/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi64_ps&ig_expand=1445)
2511#[inline]
2512#[target_feature(enable = "avx512dq")]
2513#[cfg_attr(test, assert_instr(vcvtqq2ps, ROUNDING = 8))]
2514#[rustc_legacy_const_generics(2)]
2515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2516pub fn _mm512_maskz_cvt_roundepi64_ps<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m256 {
2517 unsafe {
2518 static_assert_rounding!(ROUNDING);
2519 let b: Simd = _mm512_cvt_roundepi64_ps::<ROUNDING>(a).as_f32x8();
2520 transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x8::ZERO))
2521 }
2522}
2523
2524/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2525/// and store the results in dst.
2526///
2527/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_ps&ig_expand=1723)
2528#[inline]
2529#[target_feature(enable = "avx512dq,avx512vl")]
2530#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2532pub fn _mm_cvtepi64_ps(a: __m128i) -> __m128 {
2533 _mm_mask_cvtepi64_ps(src:_mm_undefined_ps(), k:0xff, a)
2534}
2535
2536/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2537/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2538/// not set).
2539///
2540/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_ps&ig_expand=1724)
2541#[inline]
2542#[target_feature(enable = "avx512dq,avx512vl")]
2543#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2545pub fn _mm_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
2546 unsafe { transmute(src:vcvtqq2ps_128(a.as_i64x2(), src.as_f32x4(), k)) }
2547}
2548
2549/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2550/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2551///
2552/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_ps&ig_expand=1725)
2553#[inline]
2554#[target_feature(enable = "avx512dq,avx512vl")]
2555#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2557pub fn _mm_maskz_cvtepi64_ps(k: __mmask8, a: __m128i) -> __m128 {
2558 _mm_mask_cvtepi64_ps(src:_mm_setzero_ps(), k, a)
2559}
2560
2561/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2562/// and store the results in dst.
2563///
2564/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_ps&ig_expand=1726)
2565#[inline]
2566#[target_feature(enable = "avx512dq,avx512vl")]
2567#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2569pub fn _mm256_cvtepi64_ps(a: __m256i) -> __m128 {
2570 unsafe { transmute(src:vcvtqq2ps_256(a.as_i64x4(), _MM_FROUND_CUR_DIRECTION)) }
2571}
2572
2573/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2574/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2575/// not set).
2576///
2577/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_ps&ig_expand=1727)
2578#[inline]
2579#[target_feature(enable = "avx512dq,avx512vl")]
2580#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2582pub fn _mm256_mask_cvtepi64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 {
2583 unsafe {
2584 let b: Simd = _mm256_cvtepi64_ps(a).as_f32x4();
2585 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x4()))
2586 }
2587}
2588
2589/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2590/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2591///
2592/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_ps&ig_expand=1728)
2593#[inline]
2594#[target_feature(enable = "avx512dq,avx512vl")]
2595#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2597pub fn _mm256_maskz_cvtepi64_ps(k: __mmask8, a: __m256i) -> __m128 {
2598 unsafe {
2599 let b: Simd = _mm256_cvtepi64_ps(a).as_f32x4();
2600 transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x4::ZERO))
2601 }
2602}
2603
2604/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2605/// and store the results in dst.
2606///
2607/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_ps&ig_expand=1729)
2608#[inline]
2609#[target_feature(enable = "avx512dq")]
2610#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2612pub fn _mm512_cvtepi64_ps(a: __m512i) -> __m256 {
2613 unsafe { transmute(src:vcvtqq2ps_512(a.as_i64x8(), _MM_FROUND_CUR_DIRECTION)) }
2614}
2615
2616/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2617/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2618/// not set).
2619///
2620/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_ps&ig_expand=1730)
2621#[inline]
2622#[target_feature(enable = "avx512dq")]
2623#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2625pub fn _mm512_mask_cvtepi64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 {
2626 unsafe {
2627 let b: Simd = _mm512_cvtepi64_ps(a).as_f32x8();
2628 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x8()))
2629 }
2630}
2631
2632/// Convert packed signed 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2633/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2634///
2635/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_ps&ig_expand=1731)
2636#[inline]
2637#[target_feature(enable = "avx512dq")]
2638#[cfg_attr(test, assert_instr(vcvtqq2ps))]
2639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2640pub fn _mm512_maskz_cvtepi64_ps(k: __mmask8, a: __m512i) -> __m256 {
2641 unsafe {
2642 let b: Simd = _mm512_cvtepi64_ps(a).as_f32x8();
2643 transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x8::ZERO))
2644 }
2645}
2646
2647/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2648/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2649///
2650/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2651/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2652/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2653/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2654/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2655///
2656/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_pd&ig_expand=1455)
2657#[inline]
2658#[target_feature(enable = "avx512dq")]
2659#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
2660#[rustc_legacy_const_generics(1)]
2661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2662pub fn _mm512_cvt_roundepu64_pd<const ROUNDING: i32>(a: __m512i) -> __m512d {
2663 unsafe {
2664 static_assert_rounding!(ROUNDING);
2665 transmute(src:vcvtuqq2pd_512(a.as_u64x8(), ROUNDING))
2666 }
2667}
2668
2669/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2670/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2671/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2672///
2673/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2674/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2675/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2676/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2677/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2678///
2679/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_pd&ig_expand=1456)
2680#[inline]
2681#[target_feature(enable = "avx512dq")]
2682#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
2683#[rustc_legacy_const_generics(3)]
2684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2685pub fn _mm512_mask_cvt_roundepu64_pd<const ROUNDING: i32>(
2686 src: __m512d,
2687 k: __mmask8,
2688 a: __m512i,
2689) -> __m512d {
2690 unsafe {
2691 static_assert_rounding!(ROUNDING);
2692 let b: Simd = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8();
2693 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x8()))
2694 }
2695}
2696
2697/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2698/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2699/// Rounding is done according to the ROUNDING parameter, which can be one of:
2700///
2701/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2702/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2703/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2704/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2705/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2706///
2707/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_pd&ig_expand=1457)
2708#[inline]
2709#[target_feature(enable = "avx512dq")]
2710#[cfg_attr(test, assert_instr(vcvtuqq2pd, ROUNDING = 8))]
2711#[rustc_legacy_const_generics(2)]
2712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2713pub fn _mm512_maskz_cvt_roundepu64_pd<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m512d {
2714 unsafe {
2715 static_assert_rounding!(ROUNDING);
2716 let b: Simd = _mm512_cvt_roundepu64_pd::<ROUNDING>(a).as_f64x8();
2717 transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x8::ZERO))
2718 }
2719}
2720
2721/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2722/// and store the results in dst.
2723///
2724/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_pd&ig_expand=1827)
2725#[inline]
2726#[target_feature(enable = "avx512dq,avx512vl")]
2727#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2729pub fn _mm_cvtepu64_pd(a: __m128i) -> __m128d {
2730 unsafe { transmute(src:vcvtuqq2pd_128(a.as_u64x2(), _MM_FROUND_CUR_DIRECTION)) }
2731}
2732
2733/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2734/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2735/// not set).
2736///
2737/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_pd&ig_expand=1828)
2738#[inline]
2739#[target_feature(enable = "avx512dq,avx512vl")]
2740#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2742pub fn _mm_mask_cvtepu64_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
2743 unsafe {
2744 let b: Simd = _mm_cvtepu64_pd(a).as_f64x2();
2745 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x2()))
2746 }
2747}
2748
2749/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2750/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2751///
2752/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_pd&ig_expand=1829)
2753#[inline]
2754#[target_feature(enable = "avx512dq,avx512vl")]
2755#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2757pub fn _mm_maskz_cvtepu64_pd(k: __mmask8, a: __m128i) -> __m128d {
2758 unsafe {
2759 let b: Simd = _mm_cvtepu64_pd(a).as_f64x2();
2760 transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x2::ZERO))
2761 }
2762}
2763
2764/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2765/// and store the results in dst.
2766///
2767/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_pd&ig_expand=1830)
2768#[inline]
2769#[target_feature(enable = "avx512dq,avx512vl")]
2770#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2772pub fn _mm256_cvtepu64_pd(a: __m256i) -> __m256d {
2773 unsafe { transmute(src:vcvtuqq2pd_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) }
2774}
2775
2776/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2777/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2778/// not set).
2779///
2780/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_pd&ig_expand=1831)
2781#[inline]
2782#[target_feature(enable = "avx512dq,avx512vl")]
2783#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2785pub fn _mm256_mask_cvtepu64_pd(src: __m256d, k: __mmask8, a: __m256i) -> __m256d {
2786 unsafe {
2787 let b: Simd = _mm256_cvtepu64_pd(a).as_f64x4();
2788 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x4()))
2789 }
2790}
2791
2792/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2793/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2794///
2795/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_pd&ig_expand=1832)
2796#[inline]
2797#[target_feature(enable = "avx512dq,avx512vl")]
2798#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2799#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2800pub fn _mm256_maskz_cvtepu64_pd(k: __mmask8, a: __m256i) -> __m256d {
2801 unsafe {
2802 let b: Simd = _mm256_cvtepu64_pd(a).as_f64x4();
2803 transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x4::ZERO))
2804 }
2805}
2806
2807/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2808/// and store the results in dst.
2809///
2810/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_pd&ig_expand=1833)
2811#[inline]
2812#[target_feature(enable = "avx512dq")]
2813#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2815pub fn _mm512_cvtepu64_pd(a: __m512i) -> __m512d {
2816 unsafe { transmute(src:vcvtuqq2pd_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) }
2817}
2818
2819/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2820/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2821/// not set).
2822///
2823/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_pd&ig_expand=1834)
2824#[inline]
2825#[target_feature(enable = "avx512dq")]
2826#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2828pub fn _mm512_mask_cvtepu64_pd(src: __m512d, k: __mmask8, a: __m512i) -> __m512d {
2829 unsafe {
2830 let b: Simd = _mm512_cvtepu64_pd(a).as_f64x8();
2831 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f64x8()))
2832 }
2833}
2834
2835/// Convert packed unsigned 64-bit integers in a to packed double-precision (64-bit) floating-point elements,
2836/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2837///
2838/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_pd&ig_expand=1835)
2839#[inline]
2840#[target_feature(enable = "avx512dq")]
2841#[cfg_attr(test, assert_instr(vcvtuqq2pd))]
2842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2843pub fn _mm512_maskz_cvtepu64_pd(k: __mmask8, a: __m512i) -> __m512d {
2844 unsafe {
2845 let b: Simd = _mm512_cvtepu64_pd(a).as_f64x8();
2846 transmute(src:simd_select_bitmask(m:k, yes:b, no:f64x8::ZERO))
2847 }
2848}
2849
2850/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2851/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
2852///
2853/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2854/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2855/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2856/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2857/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2858///
2859/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu64_ps&ig_expand=1461)
2860#[inline]
2861#[target_feature(enable = "avx512dq")]
2862#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
2863#[rustc_legacy_const_generics(1)]
2864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2865pub fn _mm512_cvt_roundepu64_ps<const ROUNDING: i32>(a: __m512i) -> __m256 {
2866 unsafe {
2867 static_assert_rounding!(ROUNDING);
2868 transmute(src:vcvtuqq2ps_512(a.as_u64x8(), ROUNDING))
2869 }
2870}
2871
2872/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2873/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2874/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
2875///
2876/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2877/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2878/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2879/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2880/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2881///
2882/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu64_ps&ig_expand=1462)
2883#[inline]
2884#[target_feature(enable = "avx512dq")]
2885#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
2886#[rustc_legacy_const_generics(3)]
2887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2888pub fn _mm512_mask_cvt_roundepu64_ps<const ROUNDING: i32>(
2889 src: __m256,
2890 k: __mmask8,
2891 a: __m512i,
2892) -> __m256 {
2893 unsafe {
2894 static_assert_rounding!(ROUNDING);
2895 let b: Simd = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8();
2896 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x8()))
2897 }
2898}
2899
2900/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2901/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2902/// Rounding is done according to the ROUNDING parameter, which can be one of:
2903///
2904/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
2905/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
2906/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
2907/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
2908/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
2909///
2910/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu64_ps&ig_expand=1463)
2911#[inline]
2912#[target_feature(enable = "avx512dq")]
2913#[cfg_attr(test, assert_instr(vcvtuqq2ps, ROUNDING = 8))]
2914#[rustc_legacy_const_generics(2)]
2915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2916pub fn _mm512_maskz_cvt_roundepu64_ps<const ROUNDING: i32>(k: __mmask8, a: __m512i) -> __m256 {
2917 unsafe {
2918 static_assert_rounding!(ROUNDING);
2919 let b: Simd = _mm512_cvt_roundepu64_ps::<ROUNDING>(a).as_f32x8();
2920 transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x8::ZERO))
2921 }
2922}
2923
2924/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2925/// and store the results in dst.
2926///
2927/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu64_ps&ig_expand=1845)
2928#[inline]
2929#[target_feature(enable = "avx512dq,avx512vl")]
2930#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2932pub fn _mm_cvtepu64_ps(a: __m128i) -> __m128 {
2933 _mm_mask_cvtepu64_ps(src:_mm_undefined_ps(), k:0xff, a)
2934}
2935
2936/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2937/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2938/// not set).
2939///
2940/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu64_ps&ig_expand=1846)
2941#[inline]
2942#[target_feature(enable = "avx512dq,avx512vl")]
2943#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2945pub fn _mm_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
2946 unsafe { transmute(src:vcvtuqq2ps_128(a.as_u64x2(), src.as_f32x4(), k)) }
2947}
2948
2949/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2950/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2951///
2952/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu64_ps&ig_expand=1847)
2953#[inline]
2954#[target_feature(enable = "avx512dq,avx512vl")]
2955#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2957pub fn _mm_maskz_cvtepu64_ps(k: __mmask8, a: __m128i) -> __m128 {
2958 _mm_mask_cvtepu64_ps(src:_mm_setzero_ps(), k, a)
2959}
2960
2961/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2962/// and store the results in dst.
2963///
2964/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu64_ps&ig_expand=1848)
2965#[inline]
2966#[target_feature(enable = "avx512dq,avx512vl")]
2967#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2969pub fn _mm256_cvtepu64_ps(a: __m256i) -> __m128 {
2970 unsafe { transmute(src:vcvtuqq2ps_256(a.as_u64x4(), _MM_FROUND_CUR_DIRECTION)) }
2971}
2972
2973/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2974/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
2975/// not set).
2976///
2977/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu64_ps&ig_expand=1849)
2978#[inline]
2979#[target_feature(enable = "avx512dq,avx512vl")]
2980#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2982pub fn _mm256_mask_cvtepu64_ps(src: __m128, k: __mmask8, a: __m256i) -> __m128 {
2983 unsafe {
2984 let b: Simd = _mm256_cvtepu64_ps(a).as_f32x4();
2985 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x4()))
2986 }
2987}
2988
2989/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
2990/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
2991///
2992/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu64_ps&ig_expand=1850)
2993#[inline]
2994#[target_feature(enable = "avx512dq,avx512vl")]
2995#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
2996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2997pub fn _mm256_maskz_cvtepu64_ps(k: __mmask8, a: __m256i) -> __m128 {
2998 unsafe {
2999 let b: Simd = _mm256_cvtepu64_ps(a).as_f32x4();
3000 transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x4::ZERO))
3001 }
3002}
3003
3004/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
3005/// and store the results in dst.
3006///
3007/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu64_ps&ig_expand=1851)
3008#[inline]
3009#[target_feature(enable = "avx512dq")]
3010#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
3011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3012pub fn _mm512_cvtepu64_ps(a: __m512i) -> __m256 {
3013 unsafe { transmute(src:vcvtuqq2ps_512(a.as_u64x8(), _MM_FROUND_CUR_DIRECTION)) }
3014}
3015
3016/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
3017/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3018/// not set).
3019///
3020/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu64_ps&ig_expand=1852)
3021#[inline]
3022#[target_feature(enable = "avx512dq")]
3023#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
3024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3025pub fn _mm512_mask_cvtepu64_ps(src: __m256, k: __mmask8, a: __m512i) -> __m256 {
3026 unsafe {
3027 let b: Simd = _mm512_cvtepu64_ps(a).as_f32x8();
3028 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_f32x8()))
3029 }
3030}
3031
3032/// Convert packed unsigned 64-bit integers in a to packed single-precision (32-bit) floating-point elements,
3033/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3034///
3035/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu64_ps&ig_expand=1853)
3036#[inline]
3037#[target_feature(enable = "avx512dq")]
3038#[cfg_attr(test, assert_instr(vcvtuqq2ps))]
3039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3040pub fn _mm512_maskz_cvtepu64_ps(k: __mmask8, a: __m512i) -> __m256 {
3041 unsafe {
3042 let b: Simd = _mm512_cvtepu64_ps(a).as_f32x8();
3043 transmute(src:simd_select_bitmask(m:k, yes:b, no:f32x8::ZERO))
3044 }
3045}
3046
3047/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3048/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3049///
3050/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3051/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3052/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3053/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3054/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3055///
3056/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi64&ig_expand=1472)
3057#[inline]
3058#[target_feature(enable = "avx512dq")]
3059#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
3060#[rustc_legacy_const_generics(1)]
3061#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3062pub fn _mm512_cvt_roundpd_epi64<const ROUNDING: i32>(a: __m512d) -> __m512i {
3063 static_assert_rounding!(ROUNDING);
3064 _mm512_mask_cvt_roundpd_epi64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a)
3065}
3066
3067/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3068/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3069/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3070///
3071/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3072/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3073/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3074/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3075/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3076///
3077/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi64&ig_expand=1473)
3078#[inline]
3079#[target_feature(enable = "avx512dq")]
3080#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
3081#[rustc_legacy_const_generics(3)]
3082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3083pub fn _mm512_mask_cvt_roundpd_epi64<const ROUNDING: i32>(
3084 src: __m512i,
3085 k: __mmask8,
3086 a: __m512d,
3087) -> __m512i {
3088 unsafe {
3089 static_assert_rounding!(ROUNDING);
3090 transmute(src:vcvtpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, ROUNDING))
3091 }
3092}
3093
3094/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3095/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3096/// Rounding is done according to the ROUNDING parameter, which can be one of:
3097///
3098/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3099/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3100/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3101/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3102/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3103///
3104/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epi64&ig_expand=1474)
3105#[inline]
3106#[target_feature(enable = "avx512dq")]
3107#[cfg_attr(test, assert_instr(vcvtpd2qq, ROUNDING = 8))]
3108#[rustc_legacy_const_generics(2)]
3109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3110pub fn _mm512_maskz_cvt_roundpd_epi64<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512i {
3111 static_assert_rounding!(ROUNDING);
3112 _mm512_mask_cvt_roundpd_epi64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
3113}
3114
3115/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3116/// and store the results in dst.
3117///
3118/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epi64&ig_expand=1941)
3119#[inline]
3120#[target_feature(enable = "avx512dq,avx512vl")]
3121#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3123pub fn _mm_cvtpd_epi64(a: __m128d) -> __m128i {
3124 _mm_mask_cvtpd_epi64(src:_mm_undefined_si128(), k:0xff, a)
3125}
3126
3127/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3128/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3129/// not set).
3130///
3131/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi64&ig_expand=1942)
3132#[inline]
3133#[target_feature(enable = "avx512dq,avx512vl")]
3134#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3136pub fn _mm_mask_cvtpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
3137 unsafe { transmute(src:vcvtpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) }
3138}
3139
3140/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3141/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3142///
3143/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi64&ig_expand=1943)
3144#[inline]
3145#[target_feature(enable = "avx512dq,avx512vl")]
3146#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3148pub fn _mm_maskz_cvtpd_epi64(k: __mmask8, a: __m128d) -> __m128i {
3149 _mm_mask_cvtpd_epi64(src:_mm_setzero_si128(), k, a)
3150}
3151
3152/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3153/// and store the results in dst.
3154///
3155/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epi64&ig_expand=1944)
3156#[inline]
3157#[target_feature(enable = "avx512dq,avx512vl")]
3158#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3160pub fn _mm256_cvtpd_epi64(a: __m256d) -> __m256i {
3161 _mm256_mask_cvtpd_epi64(src:_mm256_undefined_si256(), k:0xff, a)
3162}
3163
3164/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3165/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3166/// not set).
3167///
3168/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi64&ig_expand=1945)
3169#[inline]
3170#[target_feature(enable = "avx512dq,avx512vl")]
3171#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3173pub fn _mm256_mask_cvtpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3174 unsafe { transmute(src:vcvtpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) }
3175}
3176
3177/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3178/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3179///
3180/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi64&ig_expand=1946)
3181#[inline]
3182#[target_feature(enable = "avx512dq,avx512vl")]
3183#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3185pub fn _mm256_maskz_cvtpd_epi64(k: __mmask8, a: __m256d) -> __m256i {
3186 _mm256_mask_cvtpd_epi64(src:_mm256_setzero_si256(), k, a)
3187}
3188
3189/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3190/// and store the results in dst.
3191///
3192/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi64&ig_expand=1947)
3193#[inline]
3194#[target_feature(enable = "avx512dq")]
3195#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3197pub fn _mm512_cvtpd_epi64(a: __m512d) -> __m512i {
3198 _mm512_mask_cvtpd_epi64(src:_mm512_undefined_epi32(), k:0xff, a)
3199}
3200
3201/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3202/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3203/// not set).
3204///
3205/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi64&ig_expand=1948)
3206#[inline]
3207#[target_feature(enable = "avx512dq")]
3208#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3210pub fn _mm512_mask_cvtpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3211 unsafe {
3212 transmute(src:vcvtpd2qq_512(
3213 a.as_f64x8(),
3214 src.as_i64x8(),
3215 k,
3216 _MM_FROUND_CUR_DIRECTION,
3217 ))
3218 }
3219}
3220
3221/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers,
3222/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3223///
3224/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi64&ig_expand=1949)
3225#[inline]
3226#[target_feature(enable = "avx512dq")]
3227#[cfg_attr(test, assert_instr(vcvtpd2qq))]
3228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3229pub fn _mm512_maskz_cvtpd_epi64(k: __mmask8, a: __m512d) -> __m512i {
3230 _mm512_mask_cvtpd_epi64(src:_mm512_setzero_si512(), k, a)
3231}
3232
3233/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3234/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3235///
3236/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3237/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3238/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3239/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3240/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3241///
3242/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi64&ig_expand=1514)
3243#[inline]
3244#[target_feature(enable = "avx512dq")]
3245#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
3246#[rustc_legacy_const_generics(1)]
3247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3248pub fn _mm512_cvt_roundps_epi64<const ROUNDING: i32>(a: __m256) -> __m512i {
3249 static_assert_rounding!(ROUNDING);
3250 _mm512_mask_cvt_roundps_epi64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a)
3251}
3252
3253/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3254/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3255/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3256///
3257/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3258/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3259/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3260/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3261/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3262///
3263/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi64&ig_expand=1515)
3264#[inline]
3265#[target_feature(enable = "avx512dq")]
3266#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
3267#[rustc_legacy_const_generics(3)]
3268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3269pub fn _mm512_mask_cvt_roundps_epi64<const ROUNDING: i32>(
3270 src: __m512i,
3271 k: __mmask8,
3272 a: __m256,
3273) -> __m512i {
3274 unsafe {
3275 static_assert_rounding!(ROUNDING);
3276 transmute(src:vcvtps2qq_512(a.as_f32x8(), src.as_i64x8(), k, ROUNDING))
3277 }
3278}
3279
3280/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3281/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3282/// Rounding is done according to the ROUNDING parameter, which can be one of:
3283///
3284/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3285/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3286/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3287/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3288/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3289///
3290/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi64&ig_expand=1516)
3291#[inline]
3292#[target_feature(enable = "avx512dq")]
3293#[cfg_attr(test, assert_instr(vcvtps2qq, ROUNDING = 8))]
3294#[rustc_legacy_const_generics(2)]
3295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3296pub fn _mm512_maskz_cvt_roundps_epi64<const ROUNDING: i32>(k: __mmask8, a: __m256) -> __m512i {
3297 static_assert_rounding!(ROUNDING);
3298 _mm512_mask_cvt_roundps_epi64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
3299}
3300
3301/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3302/// and store the results in dst.
3303///
3304/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epi64&ig_expand=2075)
3305#[inline]
3306#[target_feature(enable = "avx512dq,avx512vl")]
3307#[cfg_attr(test, assert_instr(vcvtps2qq))]
3308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3309pub fn _mm_cvtps_epi64(a: __m128) -> __m128i {
3310 _mm_mask_cvtps_epi64(src:_mm_undefined_si128(), k:0xff, a)
3311}
3312
3313/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3314/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3315/// not set).
3316///
3317/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi64&ig_expand=2076)
3318#[inline]
3319#[target_feature(enable = "avx512dq,avx512vl")]
3320#[cfg_attr(test, assert_instr(vcvtps2qq))]
3321#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3322pub fn _mm_mask_cvtps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
3323 unsafe { transmute(src:vcvtps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) }
3324}
3325
3326/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3327/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3328///
3329/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi64&ig_expand=2077)
3330#[inline]
3331#[target_feature(enable = "avx512dq,avx512vl")]
3332#[cfg_attr(test, assert_instr(vcvtps2qq))]
3333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3334pub fn _mm_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m128i {
3335 _mm_mask_cvtps_epi64(src:_mm_setzero_si128(), k, a)
3336}
3337
3338/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3339/// and store the results in dst.
3340///
3341/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epi64&ig_expand=2078)
3342#[inline]
3343#[target_feature(enable = "avx512dq,avx512vl")]
3344#[cfg_attr(test, assert_instr(vcvtps2qq))]
3345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3346pub fn _mm256_cvtps_epi64(a: __m128) -> __m256i {
3347 _mm256_mask_cvtps_epi64(src:_mm256_undefined_si256(), k:0xff, a)
3348}
3349
3350/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3351/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3352/// not set).
3353///
3354/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi64&ig_expand=2079)
3355#[inline]
3356#[target_feature(enable = "avx512dq,avx512vl")]
3357#[cfg_attr(test, assert_instr(vcvtps2qq))]
3358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3359pub fn _mm256_mask_cvtps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
3360 unsafe { transmute(src:vcvtps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) }
3361}
3362
3363/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3364/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3365///
3366/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi64&ig_expand=2080)
3367#[inline]
3368#[target_feature(enable = "avx512dq,avx512vl")]
3369#[cfg_attr(test, assert_instr(vcvtps2qq))]
3370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3371pub fn _mm256_maskz_cvtps_epi64(k: __mmask8, a: __m128) -> __m256i {
3372 _mm256_mask_cvtps_epi64(src:_mm256_setzero_si256(), k, a)
3373}
3374
3375/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3376/// and store the results in dst.
3377///
3378/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi64&ig_expand=2081)
3379#[inline]
3380#[target_feature(enable = "avx512dq")]
3381#[cfg_attr(test, assert_instr(vcvtps2qq))]
3382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3383pub fn _mm512_cvtps_epi64(a: __m256) -> __m512i {
3384 _mm512_mask_cvtps_epi64(src:_mm512_undefined_epi32(), k:0xff, a)
3385}
3386
3387/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3388/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3389/// not set).
3390///
3391/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi64&ig_expand=2082)
3392#[inline]
3393#[target_feature(enable = "avx512dq")]
3394#[cfg_attr(test, assert_instr(vcvtps2qq))]
3395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3396pub fn _mm512_mask_cvtps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
3397 unsafe {
3398 transmute(src:vcvtps2qq_512(
3399 a.as_f32x8(),
3400 src.as_i64x8(),
3401 k,
3402 _MM_FROUND_CUR_DIRECTION,
3403 ))
3404 }
3405}
3406
3407/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers,
3408/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3409///
3410/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi64&ig_expand=2083)
3411#[inline]
3412#[target_feature(enable = "avx512dq")]
3413#[cfg_attr(test, assert_instr(vcvtps2qq))]
3414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3415pub fn _mm512_maskz_cvtps_epi64(k: __mmask8, a: __m256) -> __m512i {
3416 _mm512_mask_cvtps_epi64(src:_mm512_setzero_si512(), k, a)
3417}
3418
3419/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3420/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3421///
3422/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3423/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3424/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3425/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3426/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3427///
3428/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu64&ig_expand=1478)
3429#[inline]
3430#[target_feature(enable = "avx512dq")]
3431#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
3432#[rustc_legacy_const_generics(1)]
3433#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3434pub fn _mm512_cvt_roundpd_epu64<const ROUNDING: i32>(a: __m512d) -> __m512i {
3435 static_assert_rounding!(ROUNDING);
3436 _mm512_mask_cvt_roundpd_epu64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a)
3437}
3438
3439/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3440/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3441/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3442///
3443/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3444/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3445/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3446/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3447/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3448///
3449/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu64&ig_expand=1479)
3450#[inline]
3451#[target_feature(enable = "avx512dq")]
3452#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
3453#[rustc_legacy_const_generics(3)]
3454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3455pub fn _mm512_mask_cvt_roundpd_epu64<const ROUNDING: i32>(
3456 src: __m512i,
3457 k: __mmask8,
3458 a: __m512d,
3459) -> __m512i {
3460 unsafe {
3461 static_assert_rounding!(ROUNDING);
3462 transmute(src:vcvtpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, ROUNDING))
3463 }
3464}
3465
3466/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3467/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3468/// Rounding is done according to the ROUNDING parameter, which can be one of:
3469///
3470/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3471/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3472/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3473/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3474/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3475///
3476/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_epu64&ig_expand=1480)
3477#[inline]
3478#[target_feature(enable = "avx512dq")]
3479#[cfg_attr(test, assert_instr(vcvtpd2uqq, ROUNDING = 8))]
3480#[rustc_legacy_const_generics(2)]
3481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3482pub fn _mm512_maskz_cvt_roundpd_epu64<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512i {
3483 static_assert_rounding!(ROUNDING);
3484 _mm512_mask_cvt_roundpd_epu64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
3485}
3486
3487/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3488/// and store the results in dst.
3489///
3490/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu64&ig_expand=1959)
3491#[inline]
3492#[target_feature(enable = "avx512dq,avx512vl")]
3493#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3495pub fn _mm_cvtpd_epu64(a: __m128d) -> __m128i {
3496 _mm_mask_cvtpd_epu64(src:_mm_undefined_si128(), k:0xff, a)
3497}
3498
3499/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3500/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3501/// not set).
3502///
3503/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu64&ig_expand=1960)
3504#[inline]
3505#[target_feature(enable = "avx512dq,avx512vl")]
3506#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3508pub fn _mm_mask_cvtpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
3509 unsafe { transmute(src:vcvtpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) }
3510}
3511
3512/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3513/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3514///
3515/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu64&ig_expand=1961)
3516#[inline]
3517#[target_feature(enable = "avx512dq,avx512vl")]
3518#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3520pub fn _mm_maskz_cvtpd_epu64(k: __mmask8, a: __m128d) -> __m128i {
3521 _mm_mask_cvtpd_epu64(src:_mm_setzero_si128(), k, a)
3522}
3523
3524/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3525/// and store the results in dst.
3526///
3527/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu64&ig_expand=1962)
3528#[inline]
3529#[target_feature(enable = "avx512dq,avx512vl")]
3530#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3532pub fn _mm256_cvtpd_epu64(a: __m256d) -> __m256i {
3533 _mm256_mask_cvtpd_epu64(src:_mm256_undefined_si256(), k:0xff, a)
3534}
3535
3536/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3537/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3538/// not set).
3539///
3540/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu64&ig_expand=1963)
3541#[inline]
3542#[target_feature(enable = "avx512dq,avx512vl")]
3543#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3545pub fn _mm256_mask_cvtpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3546 unsafe { transmute(src:vcvtpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) }
3547}
3548
3549/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3550/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3551///
3552/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu64&ig_expand=1964)
3553#[inline]
3554#[target_feature(enable = "avx512dq,avx512vl")]
3555#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3557pub fn _mm256_maskz_cvtpd_epu64(k: __mmask8, a: __m256d) -> __m256i {
3558 _mm256_mask_cvtpd_epu64(src:_mm256_setzero_si256(), k, a)
3559}
3560
3561/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3562/// and store the results in dst.
3563///
3564/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu64&ig_expand=1965)
3565#[inline]
3566#[target_feature(enable = "avx512dq")]
3567#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3569pub fn _mm512_cvtpd_epu64(a: __m512d) -> __m512i {
3570 _mm512_mask_cvtpd_epu64(src:_mm512_undefined_epi32(), k:0xff, a)
3571}
3572
3573/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3574/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3575/// not set).
3576///
3577/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu64&ig_expand=1966)
3578#[inline]
3579#[target_feature(enable = "avx512dq")]
3580#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3582pub fn _mm512_mask_cvtpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3583 unsafe {
3584 transmute(src:vcvtpd2uqq_512(
3585 a.as_f64x8(),
3586 src.as_u64x8(),
3587 k,
3588 _MM_FROUND_CUR_DIRECTION,
3589 ))
3590 }
3591}
3592
3593/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers,
3594/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3595///
3596/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu64&ig_expand=1967)
3597#[inline]
3598#[target_feature(enable = "avx512dq")]
3599#[cfg_attr(test, assert_instr(vcvtpd2uqq))]
3600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3601pub fn _mm512_maskz_cvtpd_epu64(k: __mmask8, a: __m512d) -> __m512i {
3602 _mm512_mask_cvtpd_epu64(src:_mm512_setzero_si512(), k, a)
3603}
3604
3605/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3606/// and store the results in dst. Rounding is done according to the ROUNDING parameter, which can be one of:
3607///
3608/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3609/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3610/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3611/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3612/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3613///
3614/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu64&ig_expand=1520)
3615#[inline]
3616#[target_feature(enable = "avx512dq")]
3617#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
3618#[rustc_legacy_const_generics(1)]
3619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3620pub fn _mm512_cvt_roundps_epu64<const ROUNDING: i32>(a: __m256) -> __m512i {
3621 static_assert_rounding!(ROUNDING);
3622 _mm512_mask_cvt_roundps_epu64::<ROUNDING>(src:_mm512_undefined_epi32(), k:0xff, a)
3623}
3624
3625/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3626/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3627/// not set). Rounding is done according to the ROUNDING parameter, which can be one of:
3628///
3629/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3630/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3631/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3632/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3633/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3634///
3635/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu64&ig_expand=1521)
3636#[inline]
3637#[target_feature(enable = "avx512dq")]
3638#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
3639#[rustc_legacy_const_generics(3)]
3640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3641pub fn _mm512_mask_cvt_roundps_epu64<const ROUNDING: i32>(
3642 src: __m512i,
3643 k: __mmask8,
3644 a: __m256,
3645) -> __m512i {
3646 unsafe {
3647 static_assert_rounding!(ROUNDING);
3648 transmute(src:vcvtps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, ROUNDING))
3649 }
3650}
3651
3652/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3653/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3654/// Rounding is done according to the ROUNDING parameter, which can be one of:
3655///
3656/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
3657/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
3658/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
3659/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
3660/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
3661///
3662/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu64&ig_expand=1522)
3663#[inline]
3664#[target_feature(enable = "avx512dq")]
3665#[cfg_attr(test, assert_instr(vcvtps2uqq, ROUNDING = 8))]
3666#[rustc_legacy_const_generics(2)]
3667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3668pub fn _mm512_maskz_cvt_roundps_epu64<const ROUNDING: i32>(k: __mmask8, a: __m256) -> __m512i {
3669 static_assert_rounding!(ROUNDING);
3670 _mm512_mask_cvt_roundps_epu64::<ROUNDING>(src:_mm512_setzero_si512(), k, a)
3671}
3672
3673/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3674/// and store the results in dst.
3675///
3676/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu64&ig_expand=2093)
3677#[inline]
3678#[target_feature(enable = "avx512dq,avx512vl")]
3679#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3681pub fn _mm_cvtps_epu64(a: __m128) -> __m128i {
3682 _mm_mask_cvtps_epu64(src:_mm_undefined_si128(), k:0xff, a)
3683}
3684
3685/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3686/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3687/// not set).
3688///
3689/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu64&ig_expand=2094)
3690#[inline]
3691#[target_feature(enable = "avx512dq,avx512vl")]
3692#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3693#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3694pub fn _mm_mask_cvtps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
3695 unsafe { transmute(src:vcvtps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) }
3696}
3697
3698/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3699/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3700///
3701/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu64&ig_expand=2095)
3702#[inline]
3703#[target_feature(enable = "avx512dq,avx512vl")]
3704#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3706pub fn _mm_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m128i {
3707 _mm_mask_cvtps_epu64(src:_mm_setzero_si128(), k, a)
3708}
3709
3710/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3711/// and store the results in dst.
3712///
3713/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu64&ig_expand=2096)
3714#[inline]
3715#[target_feature(enable = "avx512dq,avx512vl")]
3716#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3718pub fn _mm256_cvtps_epu64(a: __m128) -> __m256i {
3719 _mm256_mask_cvtps_epu64(src:_mm256_undefined_si256(), k:0xff, a)
3720}
3721
3722/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3723/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3724/// not set).
3725///
3726/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu64&ig_expand=2097)
3727#[inline]
3728#[target_feature(enable = "avx512dq,avx512vl")]
3729#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3731pub fn _mm256_mask_cvtps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
3732 unsafe { transmute(src:vcvtps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) }
3733}
3734
3735/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3736/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3737///
3738/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu64&ig_expand=2098)
3739#[inline]
3740#[target_feature(enable = "avx512dq,avx512vl")]
3741#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3743pub fn _mm256_maskz_cvtps_epu64(k: __mmask8, a: __m128) -> __m256i {
3744 _mm256_mask_cvtps_epu64(src:_mm256_setzero_si256(), k, a)
3745}
3746
3747/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3748/// and store the results in dst.
3749///
3750/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu64&ig_expand=2099)
3751#[inline]
3752#[target_feature(enable = "avx512dq")]
3753#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3755pub fn _mm512_cvtps_epu64(a: __m256) -> __m512i {
3756 _mm512_mask_cvtps_epu64(src:_mm512_undefined_epi32(), k:0xff, a)
3757}
3758
3759/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3760/// and store the results in dst using writemask k (elements are copied from src if the corresponding bit is
3761/// not set).
3762///
3763/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu64&ig_expand=2100)
3764#[inline]
3765#[target_feature(enable = "avx512dq")]
3766#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3768pub fn _mm512_mask_cvtps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
3769 unsafe {
3770 transmute(src:vcvtps2uqq_512(
3771 a.as_f32x8(),
3772 src.as_u64x8(),
3773 k,
3774 _MM_FROUND_CUR_DIRECTION,
3775 ))
3776 }
3777}
3778
3779/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers,
3780/// and store the results in dst using zeromask k (elements are zeroed out if the corresponding bit is not set).
3781///
3782/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu64&ig_expand=2101)
3783#[inline]
3784#[target_feature(enable = "avx512dq")]
3785#[cfg_attr(test, assert_instr(vcvtps2uqq))]
3786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3787pub fn _mm512_maskz_cvtps_epu64(k: __mmask8, a: __m256) -> __m512i {
3788 _mm512_mask_cvtps_epu64(src:_mm512_setzero_si512(), k, a)
3789}
3790
3791/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3792/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
3793/// to the sae parameter.
3794///
3795/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi64&ig_expand=2264)
3796#[inline]
3797#[target_feature(enable = "avx512dq")]
3798#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
3799#[rustc_legacy_const_generics(1)]
3800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3801pub fn _mm512_cvtt_roundpd_epi64<const SAE: i32>(a: __m512d) -> __m512i {
3802 static_assert_sae!(SAE);
3803 _mm512_mask_cvtt_roundpd_epi64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a)
3804}
3805
3806/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3807/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3808/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3809///
3810/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi64&ig_expand=2265)
3811#[inline]
3812#[target_feature(enable = "avx512dq")]
3813#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
3814#[rustc_legacy_const_generics(3)]
3815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3816pub fn _mm512_mask_cvtt_roundpd_epi64<const SAE: i32>(
3817 src: __m512i,
3818 k: __mmask8,
3819 a: __m512d,
3820) -> __m512i {
3821 unsafe {
3822 static_assert_sae!(SAE);
3823 transmute(src:vcvttpd2qq_512(a.as_f64x8(), src.as_i64x8(), k, SAE))
3824 }
3825}
3826
3827/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3828/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3829/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3830///
3831/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi64&ig_expand=2266)
3832#[inline]
3833#[target_feature(enable = "avx512dq")]
3834#[cfg_attr(test, assert_instr(vcvttpd2qq, SAE = 8))]
3835#[rustc_legacy_const_generics(2)]
3836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3837pub fn _mm512_maskz_cvtt_roundpd_epi64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i {
3838 static_assert_sae!(SAE);
3839 _mm512_mask_cvtt_roundpd_epi64::<SAE>(src:_mm512_setzero_si512(), k, a)
3840}
3841
3842/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3843/// with truncation, and store the result in dst.
3844///
3845/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epi64&ig_expand=2329)
3846#[inline]
3847#[target_feature(enable = "avx512dq,avx512vl")]
3848#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3850pub fn _mm_cvttpd_epi64(a: __m128d) -> __m128i {
3851 _mm_mask_cvttpd_epi64(src:_mm_undefined_si128(), k:0xff, a)
3852}
3853
3854/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3855/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3856/// corresponding bit is not set).
3857///
3858/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi64&ig_expand=2330)
3859#[inline]
3860#[target_feature(enable = "avx512dq,avx512vl")]
3861#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3863pub fn _mm_mask_cvttpd_epi64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
3864 unsafe { transmute(src:vcvttpd2qq_128(a.as_f64x2(), src.as_i64x2(), k)) }
3865}
3866
3867/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3868/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3869/// bit is not set).
3870///
3871/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi64&ig_expand=2331)
3872#[inline]
3873#[target_feature(enable = "avx512dq,avx512vl")]
3874#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3875#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3876pub fn _mm_maskz_cvttpd_epi64(k: __mmask8, a: __m128d) -> __m128i {
3877 _mm_mask_cvttpd_epi64(src:_mm_setzero_si128(), k, a)
3878}
3879
3880/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3881/// with truncation, and store the result in dst.
3882///
3883/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epi64&ig_expand=2332)
3884#[inline]
3885#[target_feature(enable = "avx512dq,avx512vl")]
3886#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3888pub fn _mm256_cvttpd_epi64(a: __m256d) -> __m256i {
3889 _mm256_mask_cvttpd_epi64(src:_mm256_undefined_si256(), k:0xff, a)
3890}
3891
3892/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3893/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3894/// corresponding bit is not set).
3895///
3896/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi64&ig_expand=2333)
3897#[inline]
3898#[target_feature(enable = "avx512dq,avx512vl")]
3899#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3901pub fn _mm256_mask_cvttpd_epi64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
3902 unsafe { transmute(src:vcvttpd2qq_256(a.as_f64x4(), src.as_i64x4(), k)) }
3903}
3904
3905/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3906/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3907/// bit is not set).
3908///
3909/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi64&ig_expand=2334)
3910#[inline]
3911#[target_feature(enable = "avx512dq,avx512vl")]
3912#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3914pub fn _mm256_maskz_cvttpd_epi64(k: __mmask8, a: __m256d) -> __m256i {
3915 _mm256_mask_cvttpd_epi64(src:_mm256_setzero_si256(), k, a)
3916}
3917
3918/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3919/// with truncation, and store the result in dst.
3920///
3921/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi64&ig_expand=2335)
3922#[inline]
3923#[target_feature(enable = "avx512dq")]
3924#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3925#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3926pub fn _mm512_cvttpd_epi64(a: __m512d) -> __m512i {
3927 _mm512_mask_cvttpd_epi64(src:_mm512_undefined_epi32(), k:0xff, a)
3928}
3929
3930/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3931/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3932/// corresponding bit is not set).
3933///
3934/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi64&ig_expand=2336)
3935#[inline]
3936#[target_feature(enable = "avx512dq")]
3937#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3939pub fn _mm512_mask_cvttpd_epi64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
3940 unsafe {
3941 transmute(src:vcvttpd2qq_512(
3942 a.as_f64x8(),
3943 src.as_i64x8(),
3944 k,
3945 _MM_FROUND_CUR_DIRECTION,
3946 ))
3947 }
3948}
3949
3950/// Convert packed double-precision (64-bit) floating-point elements in a to packed signed 64-bit integers
3951/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
3952/// bit is not set).
3953///
3954/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi64&ig_expand=2337)
3955#[inline]
3956#[target_feature(enable = "avx512dq")]
3957#[cfg_attr(test, assert_instr(vcvttpd2qq))]
3958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3959pub fn _mm512_maskz_cvttpd_epi64(k: __mmask8, a: __m512d) -> __m512i {
3960 _mm512_mask_cvttpd_epi64(src:_mm512_setzero_si512(), k, a)
3961}
3962
3963/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3964/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
3965/// to the sae parameter.
3966///
3967/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi64&ig_expand=2294)
3968#[inline]
3969#[target_feature(enable = "avx512dq")]
3970#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
3971#[rustc_legacy_const_generics(1)]
3972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3973pub fn _mm512_cvtt_roundps_epi64<const SAE: i32>(a: __m256) -> __m512i {
3974 static_assert_sae!(SAE);
3975 _mm512_mask_cvtt_roundps_epi64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a)
3976}
3977
3978/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
3979/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
3980/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
3981///
3982/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi64&ig_expand=2295)
3983#[inline]
3984#[target_feature(enable = "avx512dq")]
3985#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
3986#[rustc_legacy_const_generics(3)]
3987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3988pub fn _mm512_mask_cvtt_roundps_epi64<const SAE: i32>(
3989 src: __m512i,
3990 k: __mmask8,
3991 a: __m256,
3992) -> __m512i {
3993 unsafe {
3994 static_assert_sae!(SAE);
3995 transmute(src:vcvttps2qq_512(a.as_f32x8(), src.as_i64x8(), k, SAE))
3996 }
3997}
3998
3999/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4000/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4001/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4002///
4003/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi64&ig_expand=2296)
4004#[inline]
4005#[target_feature(enable = "avx512dq")]
4006#[cfg_attr(test, assert_instr(vcvttps2qq, SAE = 8))]
4007#[rustc_legacy_const_generics(2)]
4008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4009pub fn _mm512_maskz_cvtt_roundps_epi64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i {
4010 static_assert_sae!(SAE);
4011 _mm512_mask_cvtt_roundps_epi64::<SAE>(src:_mm512_setzero_si512(), k, a)
4012}
4013
4014/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4015/// with truncation, and store the result in dst.
4016///
4017/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epi64&ig_expand=2420)
4018#[inline]
4019#[target_feature(enable = "avx512dq,avx512vl")]
4020#[cfg_attr(test, assert_instr(vcvttps2qq))]
4021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4022pub fn _mm_cvttps_epi64(a: __m128) -> __m128i {
4023 _mm_mask_cvttps_epi64(src:_mm_undefined_si128(), k:0xff, a)
4024}
4025
4026/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4027/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4028/// corresponding bit is not set).
4029///
4030/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi64&ig_expand=2421)
4031#[inline]
4032#[target_feature(enable = "avx512dq,avx512vl")]
4033#[cfg_attr(test, assert_instr(vcvttps2qq))]
4034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4035pub fn _mm_mask_cvttps_epi64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
4036 unsafe { transmute(src:vcvttps2qq_128(a.as_f32x4(), src.as_i64x2(), k)) }
4037}
4038
4039/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4040/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4041/// bit is not set).
4042///
4043/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi64&ig_expand=2422)
4044#[inline]
4045#[target_feature(enable = "avx512dq,avx512vl")]
4046#[cfg_attr(test, assert_instr(vcvttps2qq))]
4047#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4048pub fn _mm_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m128i {
4049 _mm_mask_cvttps_epi64(src:_mm_setzero_si128(), k, a)
4050}
4051
4052/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4053/// with truncation, and store the result in dst.
4054///
4055/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epi64&ig_expand=2423)
4056#[inline]
4057#[target_feature(enable = "avx512dq,avx512vl")]
4058#[cfg_attr(test, assert_instr(vcvttps2qq))]
4059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4060pub fn _mm256_cvttps_epi64(a: __m128) -> __m256i {
4061 _mm256_mask_cvttps_epi64(src:_mm256_undefined_si256(), k:0xff, a)
4062}
4063
4064/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4065/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4066/// corresponding bit is not set).
4067///
4068/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi64&ig_expand=2424)
4069#[inline]
4070#[target_feature(enable = "avx512dq,avx512vl")]
4071#[cfg_attr(test, assert_instr(vcvttps2qq))]
4072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4073pub fn _mm256_mask_cvttps_epi64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
4074 unsafe { transmute(src:vcvttps2qq_256(a.as_f32x4(), src.as_i64x4(), k)) }
4075}
4076
4077/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4078/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4079/// bit is not set).
4080///
4081/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi64&ig_expand=2425)
4082#[inline]
4083#[target_feature(enable = "avx512dq,avx512vl")]
4084#[cfg_attr(test, assert_instr(vcvttps2qq))]
4085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4086pub fn _mm256_maskz_cvttps_epi64(k: __mmask8, a: __m128) -> __m256i {
4087 _mm256_mask_cvttps_epi64(src:_mm256_setzero_si256(), k, a)
4088}
4089
4090/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4091/// with truncation, and store the result in dst.
4092///
4093/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi64&ig_expand=2426)
4094#[inline]
4095#[target_feature(enable = "avx512dq")]
4096#[cfg_attr(test, assert_instr(vcvttps2qq))]
4097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4098pub fn _mm512_cvttps_epi64(a: __m256) -> __m512i {
4099 _mm512_mask_cvttps_epi64(src:_mm512_undefined_epi32(), k:0xff, a)
4100}
4101
4102/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4103/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4104/// corresponding bit is not set).
4105///
4106/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi64&ig_expand=2427)
4107#[inline]
4108#[target_feature(enable = "avx512dq")]
4109#[cfg_attr(test, assert_instr(vcvttps2qq))]
4110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4111pub fn _mm512_mask_cvttps_epi64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
4112 unsafe {
4113 transmute(src:vcvttps2qq_512(
4114 a.as_f32x8(),
4115 src.as_i64x8(),
4116 k,
4117 _MM_FROUND_CUR_DIRECTION,
4118 ))
4119 }
4120}
4121
4122/// Convert packed single-precision (32-bit) floating-point elements in a to packed signed 64-bit integers
4123/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4124/// bit is not set).
4125///
4126/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi64&ig_expand=2428)
4127#[inline]
4128#[target_feature(enable = "avx512dq")]
4129#[cfg_attr(test, assert_instr(vcvttps2qq))]
4130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4131pub fn _mm512_maskz_cvttps_epi64(k: __mmask8, a: __m256) -> __m512i {
4132 _mm512_mask_cvttps_epi64(src:_mm512_setzero_si512(), k, a)
4133}
4134
4135/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4136/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
4137/// to the sae parameter.
4138///
4139/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu64&ig_expand=1965)
4140#[inline]
4141#[target_feature(enable = "avx512dq")]
4142#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
4143#[rustc_legacy_const_generics(1)]
4144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4145pub fn _mm512_cvtt_roundpd_epu64<const SAE: i32>(a: __m512d) -> __m512i {
4146 static_assert_sae!(SAE);
4147 _mm512_mask_cvtt_roundpd_epu64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a)
4148}
4149
4150/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4151/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4152/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4153///
4154/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu64&ig_expand=1966)
4155#[inline]
4156#[target_feature(enable = "avx512dq")]
4157#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
4158#[rustc_legacy_const_generics(3)]
4159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4160pub fn _mm512_mask_cvtt_roundpd_epu64<const SAE: i32>(
4161 src: __m512i,
4162 k: __mmask8,
4163 a: __m512d,
4164) -> __m512i {
4165 unsafe {
4166 static_assert_sae!(SAE);
4167 transmute(src:vcvttpd2uqq_512(a.as_f64x8(), src.as_u64x8(), k, SAE))
4168 }
4169}
4170
4171/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4172/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4173/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4174///
4175/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu64&ig_expand=1967)
4176#[inline]
4177#[target_feature(enable = "avx512dq")]
4178#[cfg_attr(test, assert_instr(vcvttpd2uqq, SAE = 8))]
4179#[rustc_legacy_const_generics(2)]
4180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4181pub fn _mm512_maskz_cvtt_roundpd_epu64<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512i {
4182 static_assert_sae!(SAE);
4183 _mm512_mask_cvtt_roundpd_epu64::<SAE>(src:_mm512_setzero_si512(), k, a)
4184}
4185
4186/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4187/// with truncation, and store the result in dst.
4188///
4189/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu64&ig_expand=2347)
4190#[inline]
4191#[target_feature(enable = "avx512dq,avx512vl")]
4192#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4194pub fn _mm_cvttpd_epu64(a: __m128d) -> __m128i {
4195 _mm_mask_cvttpd_epu64(src:_mm_undefined_si128(), k:0xff, a)
4196}
4197
4198/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4199/// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding
4200/// bit is not set).
4201///
4202/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu64&ig_expand=2348)
4203#[inline]
4204#[target_feature(enable = "avx512dq,avx512vl")]
4205#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4207pub fn _mm_mask_cvttpd_epu64(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
4208 unsafe { transmute(src:vcvttpd2uqq_128(a.as_f64x2(), src.as_u64x2(), k)) }
4209}
4210
4211/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4212/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4213/// bit is not set).
4214///
4215/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu64&ig_expand=2349)
4216#[inline]
4217#[target_feature(enable = "avx512dq,avx512vl")]
4218#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4220pub fn _mm_maskz_cvttpd_epu64(k: __mmask8, a: __m128d) -> __m128i {
4221 _mm_mask_cvttpd_epu64(src:_mm_setzero_si128(), k, a)
4222}
4223
4224/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4225/// with truncation, and store the result in dst.
4226///
4227/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu64&ig_expand=2350)
4228#[inline]
4229#[target_feature(enable = "avx512dq,avx512vl")]
4230#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4232pub fn _mm256_cvttpd_epu64(a: __m256d) -> __m256i {
4233 _mm256_mask_cvttpd_epu64(src:_mm256_undefined_si256(), k:0xff, a)
4234}
4235
4236/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4237/// with truncation, and store the results in dst using writemask k (elements are copied from src if the corresponding
4238/// bit is not set).
4239///
4240/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu64&ig_expand=2351)
4241#[inline]
4242#[target_feature(enable = "avx512dq,avx512vl")]
4243#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4245pub fn _mm256_mask_cvttpd_epu64(src: __m256i, k: __mmask8, a: __m256d) -> __m256i {
4246 unsafe { transmute(src:vcvttpd2uqq_256(a.as_f64x4(), src.as_u64x4(), k)) }
4247}
4248
4249/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4250/// with truncation, and store the results in dst using zeromask k (elements are zeroed out if the corresponding
4251/// bit is not set).
4252///
4253/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu64&ig_expand=2352)
4254#[inline]
4255#[target_feature(enable = "avx512dq,avx512vl")]
4256#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4258pub fn _mm256_maskz_cvttpd_epu64(k: __mmask8, a: __m256d) -> __m256i {
4259 _mm256_mask_cvttpd_epu64(src:_mm256_setzero_si256(), k, a)
4260}
4261
4262/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4263/// with truncation, and store the result in dst.
4264///
4265/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu64&ig_expand=2353)
4266#[inline]
4267#[target_feature(enable = "avx512dq")]
4268#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4270pub fn _mm512_cvttpd_epu64(a: __m512d) -> __m512i {
4271 _mm512_mask_cvttpd_epu64(src:_mm512_undefined_epi32(), k:0xff, a)
4272}
4273
4274/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4275/// with truncation, and store the result in dst using writemask k (elements are copied from src if the corresponding
4276/// bit is not set).
4277///
4278/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu64&ig_expand=2354)
4279#[inline]
4280#[target_feature(enable = "avx512dq")]
4281#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4283pub fn _mm512_mask_cvttpd_epu64(src: __m512i, k: __mmask8, a: __m512d) -> __m512i {
4284 unsafe {
4285 transmute(src:vcvttpd2uqq_512(
4286 a.as_f64x8(),
4287 src.as_u64x8(),
4288 k,
4289 _MM_FROUND_CUR_DIRECTION,
4290 ))
4291 }
4292}
4293
4294/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 64-bit integers
4295/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4296///
4297///
4298/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu64&ig_expand=2355)
4299#[inline]
4300#[target_feature(enable = "avx512dq")]
4301#[cfg_attr(test, assert_instr(vcvttpd2uqq))]
4302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4303pub fn _mm512_maskz_cvttpd_epu64(k: __mmask8, a: __m512d) -> __m512i {
4304 _mm512_mask_cvttpd_epu64(src:_mm512_setzero_si512(), k, a)
4305}
4306
4307/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4308/// with truncation, and store the result in dst. Exceptions can be suppressed by passing _MM_FROUND_NO_EXC
4309/// to the sae parameter.
4310///
4311/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu64&ig_expand=2300)
4312#[inline]
4313#[target_feature(enable = "avx512dq")]
4314#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
4315#[rustc_legacy_const_generics(1)]
4316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4317pub fn _mm512_cvtt_roundps_epu64<const SAE: i32>(a: __m256) -> __m512i {
4318 static_assert_sae!(SAE);
4319 _mm512_mask_cvtt_roundps_epu64::<SAE>(src:_mm512_undefined_epi32(), k:0xff, a)
4320}
4321
4322/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4323/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4324/// corresponding bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4325///
4326/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu64&ig_expand=2301)
4327#[inline]
4328#[target_feature(enable = "avx512dq")]
4329#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
4330#[rustc_legacy_const_generics(3)]
4331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4332pub fn _mm512_mask_cvtt_roundps_epu64<const SAE: i32>(
4333 src: __m512i,
4334 k: __mmask8,
4335 a: __m256,
4336) -> __m512i {
4337 unsafe {
4338 static_assert_sae!(SAE);
4339 transmute(src:vcvttps2uqq_512(a.as_f32x8(), src.as_u64x8(), k, SAE))
4340 }
4341}
4342
4343/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4344/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4345/// bit is not set). Exceptions can be suppressed by passing _MM_FROUND_NO_EXC to the sae parameter.
4346///
4347/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu64&ig_expand=2302)
4348#[inline]
4349#[target_feature(enable = "avx512dq")]
4350#[cfg_attr(test, assert_instr(vcvttps2uqq, SAE = 8))]
4351#[rustc_legacy_const_generics(2)]
4352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4353pub fn _mm512_maskz_cvtt_roundps_epu64<const SAE: i32>(k: __mmask8, a: __m256) -> __m512i {
4354 static_assert_sae!(SAE);
4355 _mm512_mask_cvtt_roundps_epu64::<SAE>(src:_mm512_setzero_si512(), k, a)
4356}
4357
4358/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4359/// with truncation, and store the result in dst.
4360///
4361/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu64&ig_expand=2438)
4362#[inline]
4363#[target_feature(enable = "avx512dq,avx512vl")]
4364#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4366pub fn _mm_cvttps_epu64(a: __m128) -> __m128i {
4367 _mm_mask_cvttps_epu64(src:_mm_undefined_si128(), k:0xff, a)
4368}
4369
4370/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4371/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4372/// corresponding bit is not set).
4373///
4374/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu64&ig_expand=2439)
4375#[inline]
4376#[target_feature(enable = "avx512dq,avx512vl")]
4377#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4379pub fn _mm_mask_cvttps_epu64(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
4380 unsafe { transmute(src:vcvttps2uqq_128(a.as_f32x4(), src.as_u64x2(), k)) }
4381}
4382
4383/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4384/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4385/// bit is not set).
4386///
4387/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu64&ig_expand=2440)
4388#[inline]
4389#[target_feature(enable = "avx512dq,avx512vl")]
4390#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4392pub fn _mm_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m128i {
4393 _mm_mask_cvttps_epu64(src:_mm_setzero_si128(), k, a)
4394}
4395
4396/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4397/// with truncation, and store the result in dst.
4398///
4399/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu64&ig_expand=2441)
4400#[inline]
4401#[target_feature(enable = "avx512dq,avx512vl")]
4402#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4404pub fn _mm256_cvttps_epu64(a: __m128) -> __m256i {
4405 _mm256_mask_cvttps_epu64(src:_mm256_undefined_si256(), k:0xff, a)
4406}
4407
4408/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4409/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4410/// corresponding bit is not set).
4411///
4412/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu64&ig_expand=2442)
4413#[inline]
4414#[target_feature(enable = "avx512dq,avx512vl")]
4415#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4417pub fn _mm256_mask_cvttps_epu64(src: __m256i, k: __mmask8, a: __m128) -> __m256i {
4418 unsafe { transmute(src:vcvttps2uqq_256(a.as_f32x4(), src.as_u64x4(), k)) }
4419}
4420
4421/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4422/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4423/// bit is not set).
4424///
4425/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu64&ig_expand=2443)
4426#[inline]
4427#[target_feature(enable = "avx512dq,avx512vl")]
4428#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4430pub fn _mm256_maskz_cvttps_epu64(k: __mmask8, a: __m128) -> __m256i {
4431 _mm256_mask_cvttps_epu64(src:_mm256_setzero_si256(), k, a)
4432}
4433
4434/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4435/// with truncation, and store the result in dst.
4436///
4437/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu64&ig_expand=2444)
4438#[inline]
4439#[target_feature(enable = "avx512dq")]
4440#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4442pub fn _mm512_cvttps_epu64(a: __m256) -> __m512i {
4443 _mm512_mask_cvttps_epu64(src:_mm512_undefined_epi32(), k:0xff, a)
4444}
4445
4446/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4447/// with truncation, and store the result in dst using writemask k (elements are copied from src if the
4448/// corresponding bit is not set).
4449///
4450/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu64&ig_expand=2445)
4451#[inline]
4452#[target_feature(enable = "avx512dq")]
4453#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4455pub fn _mm512_mask_cvttps_epu64(src: __m512i, k: __mmask8, a: __m256) -> __m512i {
4456 unsafe {
4457 transmute(src:vcvttps2uqq_512(
4458 a.as_f32x8(),
4459 src.as_u64x8(),
4460 k,
4461 _MM_FROUND_CUR_DIRECTION,
4462 ))
4463 }
4464}
4465
4466/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 64-bit integers
4467/// with truncation, and store the result in dst using zeromask k (elements are zeroed out if the corresponding
4468/// bit is not set).
4469///
4470/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu64&ig_expand=2446)
4471#[inline]
4472#[target_feature(enable = "avx512dq")]
4473#[cfg_attr(test, assert_instr(vcvttps2uqq))]
4474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4475pub fn _mm512_maskz_cvttps_epu64(k: __mmask8, a: __m256) -> __m512i {
4476 _mm512_mask_cvttps_epu64(src:_mm512_setzero_si512(), k, a)
4477}
4478
4479// Multiply-Low
4480
4481/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4482/// the low 64 bits of the intermediate integers in `dst`.
4483///
4484/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mullo_epi64&ig_expand=4778)
4485#[inline]
4486#[target_feature(enable = "avx512dq,avx512vl")]
4487#[cfg_attr(test, assert_instr(vpmullq))]
4488#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4489#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4490pub const fn _mm_mullo_epi64(a: __m128i, b: __m128i) -> __m128i {
4491 unsafe { transmute(src:simd_mul(x:a.as_i64x2(), y:b.as_i64x2())) }
4492}
4493
4494/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4495/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4496/// `src` if the corresponding bit is not set).
4497///
4498/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi64&ig_expand=4776)
4499#[inline]
4500#[target_feature(enable = "avx512dq,avx512vl")]
4501#[cfg_attr(test, assert_instr(vpmullq))]
4502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4503#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4504pub const fn _mm_mask_mullo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4505 unsafe {
4506 let b: Simd = _mm_mullo_epi64(a, b).as_i64x2();
4507 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x2()))
4508 }
4509}
4510
4511/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4512/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4513/// the corresponding bit is not set).
4514///
4515/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi64&ig_expand=4777)
4516#[inline]
4517#[target_feature(enable = "avx512dq,avx512vl")]
4518#[cfg_attr(test, assert_instr(vpmullq))]
4519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4520#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4521pub const fn _mm_maskz_mullo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
4522 unsafe {
4523 let b: Simd = _mm_mullo_epi64(a, b).as_i64x2();
4524 transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x2::ZERO))
4525 }
4526}
4527
4528/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4529/// the low 64 bits of the intermediate integers in `dst`.
4530///
4531/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mullo_epi64&ig_expand=4781)
4532#[inline]
4533#[target_feature(enable = "avx512dq,avx512vl")]
4534#[cfg_attr(test, assert_instr(vpmullq))]
4535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4536#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4537pub const fn _mm256_mullo_epi64(a: __m256i, b: __m256i) -> __m256i {
4538 unsafe { transmute(src:simd_mul(x:a.as_i64x4(), y:b.as_i64x4())) }
4539}
4540
4541/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4542/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4543/// `src` if the corresponding bit is not set).
4544///
4545/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi64&ig_expand=4779)
4546#[inline]
4547#[target_feature(enable = "avx512dq,avx512vl")]
4548#[cfg_attr(test, assert_instr(vpmullq))]
4549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4550#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4551pub const fn _mm256_mask_mullo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
4552 unsafe {
4553 let b: Simd = _mm256_mullo_epi64(a, b).as_i64x4();
4554 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x4()))
4555 }
4556}
4557
4558/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4559/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4560/// the corresponding bit is not set).
4561///
4562/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi64&ig_expand=4780)
4563#[inline]
4564#[target_feature(enable = "avx512dq,avx512vl")]
4565#[cfg_attr(test, assert_instr(vpmullq))]
4566#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4567#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4568pub const fn _mm256_maskz_mullo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
4569 unsafe {
4570 let b: Simd = _mm256_mullo_epi64(a, b).as_i64x4();
4571 transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x4::ZERO))
4572 }
4573}
4574
4575/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4576/// the low 64 bits of the intermediate integers in `dst`.
4577///
4578/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi64&ig_expand=4784)
4579#[inline]
4580#[target_feature(enable = "avx512dq")]
4581#[cfg_attr(test, assert_instr(vpmullq))]
4582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4583#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4584pub const fn _mm512_mullo_epi64(a: __m512i, b: __m512i) -> __m512i {
4585 unsafe { transmute(src:simd_mul(x:a.as_i64x8(), y:b.as_i64x8())) }
4586}
4587
4588/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4589/// the low 64 bits of the intermediate integers in `dst` using writemask `k` (elements are copied from
4590/// `src` if the corresponding bit is not set).
4591///
4592/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi64&ig_expand=4782)
4593#[inline]
4594#[target_feature(enable = "avx512dq")]
4595#[cfg_attr(test, assert_instr(vpmullq))]
4596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4597#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4598pub const fn _mm512_mask_mullo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
4599 unsafe {
4600 let b: Simd = _mm512_mullo_epi64(a, b).as_i64x8();
4601 transmute(src:simd_select_bitmask(m:k, yes:b, no:src.as_i64x8()))
4602 }
4603}
4604
4605/// Multiply packed 64-bit integers in `a` and `b`, producing intermediate 128-bit integers, and store
4606/// the low 64 bits of the intermediate integers in `dst` using zeromask `k` (elements are zeroed out if
4607/// the corresponding bit is not set).
4608///
4609/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi64&ig_expand=4783)
4610#[inline]
4611#[target_feature(enable = "avx512dq")]
4612#[cfg_attr(test, assert_instr(vpmullq))]
4613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4614#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4615pub const fn _mm512_maskz_mullo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
4616 unsafe {
4617 let b: Simd = _mm512_mullo_epi64(a, b).as_i64x8();
4618 transmute(src:simd_select_bitmask(m:k, yes:b, no:i64x8::ZERO))
4619 }
4620}
4621
4622// Mask Registers
4623
4624/// Convert 8-bit mask a to a 32-bit integer value and store the result in dst.
4625///
4626/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask8_u32&ig_expand=1891)
4627#[inline]
4628#[target_feature(enable = "avx512dq")]
4629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4630#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4631pub const fn _cvtmask8_u32(a: __mmask8) -> u32 {
4632 a as u32
4633}
4634
4635/// Convert 32-bit integer value a to an 8-bit mask and store the result in dst.
4636///
4637/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask8&ig_expand=2467)
4638#[inline]
4639#[target_feature(enable = "avx512dq")]
4640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4641#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4642pub const fn _cvtu32_mask8(a: u32) -> __mmask8 {
4643 a as __mmask8
4644}
4645
4646/// Add 16-bit masks a and b, and store the result in dst.
4647///
4648/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask16&ig_expand=3903)
4649#[inline]
4650#[target_feature(enable = "avx512dq")]
4651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4652#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4653pub const fn _kadd_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
4654 a.wrapping_add(b)
4655}
4656
4657/// Add 8-bit masks a and b, and store the result in dst.
4658///
4659/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kadd_mask8&ig_expand=3906)
4660#[inline]
4661#[target_feature(enable = "avx512dq")]
4662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4663#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4664pub const fn _kadd_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4665 a.wrapping_add(b)
4666}
4667
4668/// Bitwise AND of 8-bit masks a and b, and store the result in dst.
4669///
4670/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kand_mask8&ig_expand=3911)
4671#[inline]
4672#[target_feature(enable = "avx512dq")]
4673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4674#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4675pub const fn _kand_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4676 a & b
4677}
4678
4679/// Bitwise AND NOT of 8-bit masks a and b, and store the result in dst.
4680///
4681/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kandn_mask8&ig_expand=3916)
4682#[inline]
4683#[target_feature(enable = "avx512dq")]
4684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4685#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4686pub const fn _kandn_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4687 _knot_mask8(a) & b
4688}
4689
4690/// Bitwise NOT of 8-bit mask a, and store the result in dst.
4691///
4692/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_knot_mask8&ig_expand=3922)
4693#[inline]
4694#[target_feature(enable = "avx512dq")]
4695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4696#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4697pub const fn _knot_mask8(a: __mmask8) -> __mmask8 {
4698 a ^ 0b11111111
4699}
4700
4701/// Bitwise OR of 8-bit masks a and b, and store the result in dst.
4702///
4703/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kor_mask8&ig_expand=3927)
4704#[inline]
4705#[target_feature(enable = "avx512dq")]
4706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4707#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4708pub const fn _kor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4709 a | b
4710}
4711
4712/// Bitwise XNOR of 8-bit masks a and b, and store the result in dst.
4713///
4714/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxnor_mask8&ig_expand=3969)
4715#[inline]
4716#[target_feature(enable = "avx512dq")]
4717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4718#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4719pub const fn _kxnor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4720 _knot_mask8(_kxor_mask8(a, b))
4721}
4722
4723/// Bitwise XOR of 8-bit masks a and b, and store the result in dst.
4724///
4725/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kxor_mask8&ig_expand=3974)
4726#[inline]
4727#[target_feature(enable = "avx512dq")]
4728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4729#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4730pub const fn _kxor_mask8(a: __mmask8, b: __mmask8) -> __mmask8 {
4731 a ^ b
4732}
4733
4734/// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
4735/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
4736///
4737/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask8_u8&ig_expand=3931)
4738#[inline]
4739#[target_feature(enable = "avx512dq")]
4740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4741#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4742pub const unsafe fn _kortest_mask8_u8(a: __mmask8, b: __mmask8, all_ones: *mut u8) -> u8 {
4743 let tmp: u8 = _kor_mask8(a, b);
4744 *all_ones = (tmp == 0xff) as u8;
4745 (tmp == 0) as u8
4746}
4747
4748/// Compute the bitwise OR of 8-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
4749/// store 0 in dst.
4750///
4751/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask8_u8&ig_expand=3936)
4752#[inline]
4753#[target_feature(enable = "avx512dq")]
4754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4755#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4756pub const fn _kortestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4757 (_kor_mask8(a, b) == 0xff) as u8
4758}
4759
4760/// Compute the bitwise OR of 8-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
4761/// store 0 in dst.
4762///
4763/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask8_u8&ig_expand=3941)
4764#[inline]
4765#[target_feature(enable = "avx512dq")]
4766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4767#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4768pub const fn _kortestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4769 (_kor_mask8(a, b) == 0) as u8
4770}
4771
4772/// Shift 8-bit mask a left by count bits while shifting in zeros, and store the result in dst.
4773///
4774/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask8&ig_expand=3945)
4775#[inline]
4776#[target_feature(enable = "avx512dq")]
4777#[rustc_legacy_const_generics(1)]
4778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4779#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4780pub const fn _kshiftli_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
4781 a.unbounded_shl(COUNT)
4782}
4783
4784/// Shift 8-bit mask a right by count bits while shifting in zeros, and store the result in dst.
4785///
4786/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask8&ig_expand=3949)
4787#[inline]
4788#[target_feature(enable = "avx512dq")]
4789#[rustc_legacy_const_generics(1)]
4790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4791#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4792pub const fn _kshiftri_mask8<const COUNT: u32>(a: __mmask8) -> __mmask8 {
4793 a.unbounded_shr(COUNT)
4794}
4795
4796/// Compute the bitwise AND of 16-bit masks a and b, and if the result is all zeros, store 1 in dst,
4797/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
4798/// zeros, store 1 in and_not, otherwise store 0 in and_not.
4799///
4800/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask16_u8&ig_expand=3950)
4801#[inline]
4802#[target_feature(enable = "avx512dq")]
4803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4804#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4805pub const unsafe fn _ktest_mask16_u8(a: __mmask16, b: __mmask16, and_not: *mut u8) -> u8 {
4806 *and_not = (_kandn_mask16(a, b) == 0) as u8;
4807 (_kand_mask16(a, b) == 0) as u8
4808}
4809
4810/// Compute the bitwise AND of 8-bit masks a and b, and if the result is all zeros, store 1 in dst,
4811/// otherwise store 0 in dst. Compute the bitwise NOT of a and then AND with b, if the result is all
4812/// zeros, store 1 in and_not, otherwise store 0 in and_not.
4813///
4814/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktest_mask8_u8&ig_expand=3953)
4815#[inline]
4816#[target_feature(enable = "avx512dq")]
4817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4818#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4819pub const unsafe fn _ktest_mask8_u8(a: __mmask8, b: __mmask8, and_not: *mut u8) -> u8 {
4820 *and_not = (_kandn_mask8(a, b) == 0) as u8;
4821 (_kand_mask8(a, b) == 0) as u8
4822}
4823
4824/// Compute the bitwise NOT of 16-bit mask a and then AND with 16-bit mask b, if the result is all
4825/// zeros, store 1 in dst, otherwise store 0 in dst.
4826///
4827/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask16_u8&ig_expand=3954)
4828#[inline]
4829#[target_feature(enable = "avx512dq")]
4830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4831#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4832pub const fn _ktestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
4833 (_kandn_mask16(a, b) == 0) as u8
4834}
4835
4836/// Compute the bitwise NOT of 8-bit mask a and then AND with 8-bit mask b, if the result is all
4837/// zeros, store 1 in dst, otherwise store 0 in dst.
4838///
4839/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestc_mask8_u8&ig_expand=3957)
4840#[inline]
4841#[target_feature(enable = "avx512dq")]
4842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4843#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4844pub const fn _ktestc_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4845 (_kandn_mask8(a, b) == 0) as u8
4846}
4847
4848/// Compute the bitwise AND of 16-bit masks a and b, if the result is all zeros, store 1 in dst, otherwise
4849/// store 0 in dst.
4850///
4851/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask16_u8&ig_expand=3958)
4852#[inline]
4853#[target_feature(enable = "avx512dq")]
4854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4855#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4856pub const fn _ktestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
4857 (_kand_mask16(a, b) == 0) as u8
4858}
4859
4860/// Compute the bitwise AND of 8-bit masks a and b, if the result is all zeros, store 1 in dst, otherwise
4861/// store 0 in dst.
4862///
4863/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_ktestz_mask8_u8&ig_expand=3961)
4864#[inline]
4865#[target_feature(enable = "avx512dq")]
4866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4867#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4868pub const fn _ktestz_mask8_u8(a: __mmask8, b: __mmask8) -> u8 {
4869 (_kand_mask8(a, b) == 0) as u8
4870}
4871
4872/// Load 8-bit mask from memory
4873///
4874/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask8&ig_expand=3999)
4875#[inline]
4876#[target_feature(enable = "avx512dq")]
4877#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4878#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4879pub const unsafe fn _load_mask8(mem_addr: *const __mmask8) -> __mmask8 {
4880 *mem_addr
4881}
4882
4883/// Store 8-bit mask to memory
4884///
4885/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask8&ig_expand=6468)
4886#[inline]
4887#[target_feature(enable = "avx512dq")]
4888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4889#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4890pub const unsafe fn _store_mask8(mem_addr: *mut __mmask8, a: __mmask8) {
4891 *mem_addr = a;
4892}
4893
4894/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4895/// integer in a.
4896///
4897/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi32_mask&ig_expand=4612)
4898#[inline]
4899#[target_feature(enable = "avx512dq,avx512vl")]
4900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4901#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4902pub const fn _mm_movepi32_mask(a: __m128i) -> __mmask8 {
4903 let zero: __m128i = _mm_setzero_si128();
4904 _mm_cmplt_epi32_mask(a, b:zero)
4905}
4906
4907/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4908/// integer in a.
4909///
4910/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi32_mask&ig_expand=4613)
4911#[inline]
4912#[target_feature(enable = "avx512dq,avx512vl")]
4913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4914#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4915pub const fn _mm256_movepi32_mask(a: __m256i) -> __mmask8 {
4916 let zero: __m256i = _mm256_setzero_si256();
4917 _mm256_cmplt_epi32_mask(a, b:zero)
4918}
4919
4920/// Set each bit of mask register k based on the most significant bit of the corresponding packed 32-bit
4921/// integer in a.
4922///
4923/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi32_mask&ig_expand=4614)
4924#[inline]
4925#[target_feature(enable = "avx512dq")]
4926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4927#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4928pub const fn _mm512_movepi32_mask(a: __m512i) -> __mmask16 {
4929 let zero: __m512i = _mm512_setzero_si512();
4930 _mm512_cmplt_epi32_mask(a, b:zero)
4931}
4932
4933/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4934/// integer in a.
4935///
4936/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movepi64_mask&ig_expand=4615)
4937#[inline]
4938#[target_feature(enable = "avx512dq,avx512vl")]
4939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4940#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4941pub const fn _mm_movepi64_mask(a: __m128i) -> __mmask8 {
4942 let zero: __m128i = _mm_setzero_si128();
4943 _mm_cmplt_epi64_mask(a, b:zero)
4944}
4945
4946/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4947/// integer in a.
4948///
4949/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movepi64_mask&ig_expand=4616)
4950#[inline]
4951#[target_feature(enable = "avx512dq,avx512vl")]
4952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4953#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4954pub const fn _mm256_movepi64_mask(a: __m256i) -> __mmask8 {
4955 let zero: __m256i = _mm256_setzero_si256();
4956 _mm256_cmplt_epi64_mask(a, b:zero)
4957}
4958
4959/// Set each bit of mask register k based on the most significant bit of the corresponding packed 64-bit
4960/// integer in a.
4961///
4962/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movepi64_mask&ig_expand=4617)
4963#[inline]
4964#[target_feature(enable = "avx512dq")]
4965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4966#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4967pub const fn _mm512_movepi64_mask(a: __m512i) -> __mmask8 {
4968 let zero: __m512i = _mm512_setzero_si512();
4969 _mm512_cmplt_epi64_mask(a, b:zero)
4970}
4971
4972/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
4973/// bit in k.
4974///
4975/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi32&ig_expand=4625)
4976#[inline]
4977#[target_feature(enable = "avx512dq,avx512vl")]
4978#[cfg_attr(test, assert_instr(vpmovm2d))]
4979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4980#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4981pub const fn _mm_movm_epi32(k: __mmask8) -> __m128i {
4982 let ones: __m128i = _mm_set1_epi32(-1);
4983 _mm_maskz_mov_epi32(k, a:ones)
4984}
4985
4986/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
4987/// bit in k.
4988///
4989/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi32&ig_expand=4626)
4990#[inline]
4991#[target_feature(enable = "avx512dq,avx512vl")]
4992#[cfg_attr(test, assert_instr(vpmovm2d))]
4993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4994#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
4995pub const fn _mm256_movm_epi32(k: __mmask8) -> __m256i {
4996 let ones: __m256i = _mm256_set1_epi32(-1);
4997 _mm256_maskz_mov_epi32(k, a:ones)
4998}
4999
5000/// Set each packed 32-bit integer in dst to all ones or all zeros based on the value of the corresponding
5001/// bit in k.
5002///
5003/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi32&ig_expand=4627)
5004#[inline]
5005#[target_feature(enable = "avx512dq")]
5006#[cfg_attr(test, assert_instr(vpmovm2d))]
5007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5008#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5009pub const fn _mm512_movm_epi32(k: __mmask16) -> __m512i {
5010 let ones: __m512i = _mm512_set1_epi32(-1);
5011 _mm512_maskz_mov_epi32(k, a:ones)
5012}
5013
5014/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
5015/// bit in k.
5016///
5017/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_movm_epi64&ig_expand=4628)
5018#[inline]
5019#[target_feature(enable = "avx512dq,avx512vl")]
5020#[cfg_attr(test, assert_instr(vpmovm2q))]
5021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5022#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5023pub const fn _mm_movm_epi64(k: __mmask8) -> __m128i {
5024 let ones: __m128i = _mm_set1_epi64x(-1);
5025 _mm_maskz_mov_epi64(k, a:ones)
5026}
5027
5028/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
5029/// bit in k.
5030///
5031/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_movm_epi64&ig_expand=4629)
5032#[inline]
5033#[target_feature(enable = "avx512dq,avx512vl")]
5034#[cfg_attr(test, assert_instr(vpmovm2q))]
5035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5036#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5037pub const fn _mm256_movm_epi64(k: __mmask8) -> __m256i {
5038 let ones: __m256i = _mm256_set1_epi64x(-1);
5039 _mm256_maskz_mov_epi64(k, a:ones)
5040}
5041
5042/// Set each packed 64-bit integer in dst to all ones or all zeros based on the value of the corresponding
5043/// bit in k.
5044///
5045/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movm_epi64&ig_expand=4630)
5046#[inline]
5047#[target_feature(enable = "avx512dq")]
5048#[cfg_attr(test, assert_instr(vpmovm2q))]
5049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5050#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
5051pub const fn _mm512_movm_epi64(k: __mmask8) -> __m512i {
5052 let ones: __m512i = _mm512_set1_epi64(-1);
5053 _mm512_maskz_mov_epi64(k, a:ones)
5054}
5055
5056// Range
5057
5058/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5059/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
5060/// Lower 2 bits of IMM8 specifies the operation control:
5061/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5062/// Upper 2 bits of IMM8 specifies the sign control:
5063/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5064/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5065///
5066/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_pd&ig_expand=5210)
5067#[inline]
5068#[target_feature(enable = "avx512dq")]
5069#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
5070#[rustc_legacy_const_generics(2, 3)]
5071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5072pub fn _mm512_range_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
5073 static_assert_uimm_bits!(IMM8, 4);
5074 static_assert_sae!(SAE);
5075 _mm512_mask_range_round_pd::<IMM8, SAE>(src:_mm512_setzero_pd(), k:0xff, a, b)
5076}
5077
5078/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5079/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5080/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5081/// Lower 2 bits of IMM8 specifies the operation control:
5082/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5083/// Upper 2 bits of IMM8 specifies the sign control:
5084/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5085/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5086///
5087/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_pd&ig_expand=5208)
5088#[inline]
5089#[target_feature(enable = "avx512dq")]
5090#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
5091#[rustc_legacy_const_generics(4, 5)]
5092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5093pub fn _mm512_mask_range_round_pd<const IMM8: i32, const SAE: i32>(
5094 src: __m512d,
5095 k: __mmask8,
5096 a: __m512d,
5097 b: __m512d,
5098) -> __m512d {
5099 unsafe {
5100 static_assert_uimm_bits!(IMM8, 4);
5101 static_assert_sae!(SAE);
5102 transmute(src:vrangepd_512(
5103 a.as_f64x8(),
5104 b.as_f64x8(),
5105 IMM8,
5106 src.as_f64x8(),
5107 k,
5108 SAE,
5109 ))
5110 }
5111}
5112
5113/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5114/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5115/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5116/// Lower 2 bits of IMM8 specifies the operation control:
5117/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5118/// Upper 2 bits of IMM8 specifies the sign control:
5119/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5120/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5121///
5122/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_pd&ig_expand=5209)
5123#[inline]
5124#[target_feature(enable = "avx512dq")]
5125#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5, SAE = 8))]
5126#[rustc_legacy_const_generics(3, 4)]
5127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5128pub fn _mm512_maskz_range_round_pd<const IMM8: i32, const SAE: i32>(
5129 k: __mmask8,
5130 a: __m512d,
5131 b: __m512d,
5132) -> __m512d {
5133 static_assert_uimm_bits!(IMM8, 4);
5134 static_assert_sae!(SAE);
5135 _mm512_mask_range_round_pd::<IMM8, SAE>(src:_mm512_setzero_pd(), k, a, b)
5136}
5137
5138/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5139/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
5140/// Lower 2 bits of IMM8 specifies the operation control:
5141/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5142/// Upper 2 bits of IMM8 specifies the sign control:
5143/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5144///
5145/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_pd&ig_expand=5192)
5146#[inline]
5147#[target_feature(enable = "avx512dq,avx512vl")]
5148#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5149#[rustc_legacy_const_generics(2)]
5150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5151pub fn _mm_range_pd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
5152 static_assert_uimm_bits!(IMM8, 4);
5153 _mm_mask_range_pd::<IMM8>(src:_mm_setzero_pd(), k:0xff, a, b)
5154}
5155
5156/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5157/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5158/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5159/// Lower 2 bits of IMM8 specifies the operation control:
5160/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5161/// Upper 2 bits of IMM8 specifies the sign control:
5162/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5163///
5164/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_pd&ig_expand=5190)
5165#[inline]
5166#[target_feature(enable = "avx512dq,avx512vl")]
5167#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5168#[rustc_legacy_const_generics(4)]
5169#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5170pub fn _mm_mask_range_pd<const IMM8: i32>(
5171 src: __m128d,
5172 k: __mmask8,
5173 a: __m128d,
5174 b: __m128d,
5175) -> __m128d {
5176 unsafe {
5177 static_assert_uimm_bits!(IMM8, 4);
5178 transmute(src:vrangepd_128(
5179 a.as_f64x2(),
5180 b.as_f64x2(),
5181 IMM8,
5182 src.as_f64x2(),
5183 k,
5184 ))
5185 }
5186}
5187
5188/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5189/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5190/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5191/// Lower 2 bits of IMM8 specifies the operation control:
5192/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5193/// Upper 2 bits of IMM8 specifies the sign control:
5194/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5195///
5196/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_pd&ig_expand=5191)
5197#[inline]
5198#[target_feature(enable = "avx512dq,avx512vl")]
5199#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5200#[rustc_legacy_const_generics(3)]
5201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5202pub fn _mm_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
5203 static_assert_uimm_bits!(IMM8, 4);
5204 _mm_mask_range_pd::<IMM8>(src:_mm_setzero_pd(), k, a, b)
5205}
5206
5207/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5208/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
5209/// Lower 2 bits of IMM8 specifies the operation control:
5210/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5211/// Upper 2 bits of IMM8 specifies the sign control:
5212/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5213///
5214/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_pd&ig_expand=5195)
5215#[inline]
5216#[target_feature(enable = "avx512dq,avx512vl")]
5217#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5218#[rustc_legacy_const_generics(2)]
5219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5220pub fn _mm256_range_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d {
5221 static_assert_uimm_bits!(IMM8, 4);
5222 _mm256_mask_range_pd::<IMM8>(src:_mm256_setzero_pd(), k:0xff, a, b)
5223}
5224
5225/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5226/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5227/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5228/// Lower 2 bits of IMM8 specifies the operation control:
5229/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5230/// Upper 2 bits of IMM8 specifies the sign control:
5231/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5232///
5233/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_pd&ig_expand=5193)
5234#[inline]
5235#[target_feature(enable = "avx512dq,avx512vl")]
5236#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5237#[rustc_legacy_const_generics(4)]
5238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5239pub fn _mm256_mask_range_pd<const IMM8: i32>(
5240 src: __m256d,
5241 k: __mmask8,
5242 a: __m256d,
5243 b: __m256d,
5244) -> __m256d {
5245 unsafe {
5246 static_assert_uimm_bits!(IMM8, 4);
5247 transmute(src:vrangepd_256(
5248 a.as_f64x4(),
5249 b.as_f64x4(),
5250 IMM8,
5251 src.as_f64x4(),
5252 k,
5253 ))
5254 }
5255}
5256
5257/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5258/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5259/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5260/// Lower 2 bits of IMM8 specifies the operation control:
5261/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5262/// Upper 2 bits of IMM8 specifies the sign control:
5263/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5264///
5265/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_pd&ig_expand=5194)
5266#[inline]
5267#[target_feature(enable = "avx512dq,avx512vl")]
5268#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5269#[rustc_legacy_const_generics(3)]
5270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5271pub fn _mm256_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
5272 static_assert_uimm_bits!(IMM8, 4);
5273 _mm256_mask_range_pd::<IMM8>(src:_mm256_setzero_pd(), k, a, b)
5274}
5275
5276/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5277/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
5278/// Lower 2 bits of IMM8 specifies the operation control:
5279/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5280/// Upper 2 bits of IMM8 specifies the sign control:
5281/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5282///
5283/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_pd&ig_expand=5198)
5284#[inline]
5285#[target_feature(enable = "avx512dq")]
5286#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5287#[rustc_legacy_const_generics(2)]
5288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5289pub fn _mm512_range_pd<const IMM8: i32>(a: __m512d, b: __m512d) -> __m512d {
5290 static_assert_uimm_bits!(IMM8, 4);
5291 _mm512_mask_range_pd::<IMM8>(src:_mm512_setzero_pd(), k:0xff, a, b)
5292}
5293
5294/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5295/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5296/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5297/// Lower 2 bits of IMM8 specifies the operation control:
5298/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5299/// Upper 2 bits of IMM8 specifies the sign control:
5300/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5301///
5302/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_pd&ig_expand=5196)
5303#[inline]
5304#[target_feature(enable = "avx512dq")]
5305#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5306#[rustc_legacy_const_generics(4)]
5307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5308pub fn _mm512_mask_range_pd<const IMM8: i32>(
5309 src: __m512d,
5310 k: __mmask8,
5311 a: __m512d,
5312 b: __m512d,
5313) -> __m512d {
5314 unsafe {
5315 static_assert_uimm_bits!(IMM8, 4);
5316 transmute(src:vrangepd_512(
5317 a.as_f64x8(),
5318 b.as_f64x8(),
5319 IMM8,
5320 src.as_f64x8(),
5321 k,
5322 _MM_FROUND_CUR_DIRECTION,
5323 ))
5324 }
5325}
5326
5327/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5328/// double-precision (64-bit) floating-point elements in a and b, and store the results in dst using
5329/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5330/// Lower 2 bits of IMM8 specifies the operation control:
5331/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5332/// Upper 2 bits of IMM8 specifies the sign control:
5333/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5334///
5335/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_pd&ig_expand=5197)
5336#[inline]
5337#[target_feature(enable = "avx512dq")]
5338#[cfg_attr(test, assert_instr(vrangepd, IMM8 = 5))]
5339#[rustc_legacy_const_generics(3)]
5340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5341pub fn _mm512_maskz_range_pd<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
5342 static_assert_uimm_bits!(IMM8, 4);
5343 _mm512_mask_range_pd::<IMM8>(src:_mm512_setzero_pd(), k, a, b)
5344}
5345
5346/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5347/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5348/// Lower 2 bits of IMM8 specifies the operation control:
5349/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5350/// Upper 2 bits of IMM8 specifies the sign control:
5351/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5352/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5353///
5354/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_ps&ig_expand=5213)
5355#[inline]
5356#[target_feature(enable = "avx512dq")]
5357#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
5358#[rustc_legacy_const_generics(2, 3)]
5359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5360pub fn _mm512_range_round_ps<const IMM8: i32, const SAE: i32>(a: __m512, b: __m512) -> __m512 {
5361 static_assert_uimm_bits!(IMM8, 4);
5362 static_assert_sae!(SAE);
5363 _mm512_mask_range_round_ps::<IMM8, SAE>(src:_mm512_setzero_ps(), k:0xffff, a, b)
5364}
5365
5366/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5367/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5368/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5369/// Lower 2 bits of IMM8 specifies the operation control:
5370/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5371/// Upper 2 bits of IMM8 specifies the sign control:
5372/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5373///
5374/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_round_ps&ig_expand=5211)
5375#[inline]
5376#[target_feature(enable = "avx512dq")]
5377#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
5378#[rustc_legacy_const_generics(4, 5)]
5379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5380pub fn _mm512_mask_range_round_ps<const IMM8: i32, const SAE: i32>(
5381 src: __m512,
5382 k: __mmask16,
5383 a: __m512,
5384 b: __m512,
5385) -> __m512 {
5386 unsafe {
5387 static_assert_uimm_bits!(IMM8, 4);
5388 static_assert_sae!(SAE);
5389 transmute(src:vrangeps_512(
5390 a.as_f32x16(),
5391 b.as_f32x16(),
5392 IMM8,
5393 src.as_f32x16(),
5394 k,
5395 SAE,
5396 ))
5397 }
5398}
5399
5400/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5401/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5402/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5403/// Lower 2 bits of IMM8 specifies the operation control:
5404/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5405/// Upper 2 bits of IMM8 specifies the sign control:
5406/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5407///
5408/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_round_ps&ig_expand=5212)
5409#[inline]
5410#[target_feature(enable = "avx512dq")]
5411#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5, SAE = 8))]
5412#[rustc_legacy_const_generics(3, 4)]
5413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5414pub fn _mm512_maskz_range_round_ps<const IMM8: i32, const SAE: i32>(
5415 k: __mmask16,
5416 a: __m512,
5417 b: __m512,
5418) -> __m512 {
5419 static_assert_uimm_bits!(IMM8, 4);
5420 static_assert_sae!(SAE);
5421 _mm512_mask_range_round_ps::<IMM8, SAE>(src:_mm512_setzero_ps(), k, a, b)
5422}
5423
5424/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5425/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5426/// Lower 2 bits of IMM8 specifies the operation control:
5427/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5428/// Upper 2 bits of IMM8 specifies the sign control:
5429/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5430///
5431/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_ps&ig_expand=5201)
5432#[inline]
5433#[target_feature(enable = "avx512dq,avx512vl")]
5434#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5435#[rustc_legacy_const_generics(2)]
5436#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5437pub fn _mm_range_ps<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
5438 static_assert_uimm_bits!(IMM8, 4);
5439 _mm_mask_range_ps::<IMM8>(src:_mm_setzero_ps(), k:0xff, a, b)
5440}
5441
5442/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5443/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5444/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5445/// Lower 2 bits of IMM8 specifies the operation control:
5446/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5447/// Upper 2 bits of IMM8 specifies the sign control:
5448/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5449///
5450/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ps&ig_expand=5199)
5451#[inline]
5452#[target_feature(enable = "avx512dq,avx512vl")]
5453#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5454#[rustc_legacy_const_generics(4)]
5455#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5456pub fn _mm_mask_range_ps<const IMM8: i32>(
5457 src: __m128,
5458 k: __mmask8,
5459 a: __m128,
5460 b: __m128,
5461) -> __m128 {
5462 unsafe {
5463 static_assert_uimm_bits!(IMM8, 4);
5464 transmute(src:vrangeps_128(
5465 a.as_f32x4(),
5466 b.as_f32x4(),
5467 IMM8,
5468 src.as_f32x4(),
5469 k,
5470 ))
5471 }
5472}
5473
5474/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5475/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5476/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5477/// Lower 2 bits of IMM8 specifies the operation control:
5478/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5479/// Upper 2 bits of IMM8 specifies the sign control:
5480/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5481///
5482/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ps&ig_expand=5200)
5483#[inline]
5484#[target_feature(enable = "avx512dq,avx512vl")]
5485#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5486#[rustc_legacy_const_generics(3)]
5487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5488pub fn _mm_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
5489 static_assert_uimm_bits!(IMM8, 4);
5490 _mm_mask_range_ps::<IMM8>(src:_mm_setzero_ps(), k, a, b)
5491}
5492
5493/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5494/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5495/// Lower 2 bits of IMM8 specifies the operation control:
5496/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5497/// Upper 2 bits of IMM8 specifies the sign control:
5498/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5499///
5500/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_range_ps&ig_expand=5204)
5501#[inline]
5502#[target_feature(enable = "avx512dq,avx512vl")]
5503#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5504#[rustc_legacy_const_generics(2)]
5505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5506pub fn _mm256_range_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
5507 static_assert_uimm_bits!(IMM8, 4);
5508 _mm256_mask_range_ps::<IMM8>(src:_mm256_setzero_ps(), k:0xff, a, b)
5509}
5510
5511/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5512/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5513/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5514/// Lower 2 bits of IMM8 specifies the operation control:
5515/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5516/// Upper 2 bits of IMM8 specifies the sign control:
5517/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5518///
5519/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_range_ps&ig_expand=5202)
5520#[inline]
5521#[target_feature(enable = "avx512dq,avx512vl")]
5522#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5523#[rustc_legacy_const_generics(4)]
5524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5525pub fn _mm256_mask_range_ps<const IMM8: i32>(
5526 src: __m256,
5527 k: __mmask8,
5528 a: __m256,
5529 b: __m256,
5530) -> __m256 {
5531 unsafe {
5532 static_assert_uimm_bits!(IMM8, 4);
5533 transmute(src:vrangeps_256(
5534 a.as_f32x8(),
5535 b.as_f32x8(),
5536 IMM8,
5537 src.as_f32x8(),
5538 k,
5539 ))
5540 }
5541}
5542
5543/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5544/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5545/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5546/// Lower 2 bits of IMM8 specifies the operation control:
5547/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5548/// Upper 2 bits of IMM8 specifies the sign control:
5549/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5550///
5551/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_range_ps&ig_expand=5203)
5552#[inline]
5553#[target_feature(enable = "avx512dq,avx512vl")]
5554#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5555#[rustc_legacy_const_generics(3)]
5556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5557pub fn _mm256_maskz_range_ps<const IMM8: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
5558 static_assert_uimm_bits!(IMM8, 4);
5559 _mm256_mask_range_ps::<IMM8>(src:_mm256_setzero_ps(), k, a, b)
5560}
5561
5562/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5563/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
5564/// Lower 2 bits of IMM8 specifies the operation control:
5565/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5566/// Upper 2 bits of IMM8 specifies the sign control:
5567/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5568///
5569/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_ps&ig_expand=5207)
5570#[inline]
5571#[target_feature(enable = "avx512dq")]
5572#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5573#[rustc_legacy_const_generics(2)]
5574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5575pub fn _mm512_range_ps<const IMM8: i32>(a: __m512, b: __m512) -> __m512 {
5576 static_assert_uimm_bits!(IMM8, 4);
5577 _mm512_mask_range_ps::<IMM8>(src:_mm512_setzero_ps(), k:0xffff, a, b)
5578}
5579
5580/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5581/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5582/// writemask k (elements are copied from src to dst if the corresponding mask bit is not set).
5583/// Lower 2 bits of IMM8 specifies the operation control:
5584/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5585/// Upper 2 bits of IMM8 specifies the sign control:
5586/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5587///
5588/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_range_ps&ig_expand=5205)
5589#[inline]
5590#[target_feature(enable = "avx512dq")]
5591#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5592#[rustc_legacy_const_generics(4)]
5593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5594pub fn _mm512_mask_range_ps<const IMM8: i32>(
5595 src: __m512,
5596 k: __mmask16,
5597 a: __m512,
5598 b: __m512,
5599) -> __m512 {
5600 unsafe {
5601 static_assert_uimm_bits!(IMM8, 4);
5602 transmute(src:vrangeps_512(
5603 a.as_f32x16(),
5604 b.as_f32x16(),
5605 IMM8,
5606 src.as_f32x16(),
5607 k,
5608 _MM_FROUND_CUR_DIRECTION,
5609 ))
5610 }
5611}
5612
5613/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for packed
5614/// single-precision (32-bit) floating-point elements in a and b, and store the results in dst using
5615/// zeromask k (elements are zeroed out if the corresponding mask bit is not set).
5616/// Lower 2 bits of IMM8 specifies the operation control:
5617/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5618/// Upper 2 bits of IMM8 specifies the sign control:
5619/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5620///
5621/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_range_ps&ig_expand=5206)
5622#[inline]
5623#[target_feature(enable = "avx512dq")]
5624#[cfg_attr(test, assert_instr(vrangeps, IMM8 = 5))]
5625#[rustc_legacy_const_generics(3)]
5626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5627pub fn _mm512_maskz_range_ps<const IMM8: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
5628 static_assert_uimm_bits!(IMM8, 4);
5629 _mm512_mask_range_ps::<IMM8>(src:_mm512_setzero_ps(), k, a, b)
5630}
5631
5632/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5633/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5634/// of dst, and copy the upper element from a to the upper element of dst.
5635/// Lower 2 bits of IMM8 specifies the operation control:
5636/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5637/// Upper 2 bits of IMM8 specifies the sign control:
5638/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5639/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5640///
5641/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_sd&ig_expand=5216)
5642#[inline]
5643#[target_feature(enable = "avx512dq")]
5644#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
5645#[rustc_legacy_const_generics(2, 3)]
5646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5647pub fn _mm_range_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
5648 static_assert_uimm_bits!(IMM8, 4);
5649 static_assert_sae!(SAE);
5650 _mm_mask_range_round_sd::<IMM8, SAE>(src:_mm_setzero_pd(), k:0xff, a, b)
5651}
5652
5653/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5654/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5655/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5656/// upper element from a to the upper element of dst.
5657/// Lower 2 bits of IMM8 specifies the operation control:
5658/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5659/// Upper 2 bits of IMM8 specifies the sign control:
5660/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5661/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5662///
5663/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_sd&ig_expand=5214)
5664#[inline]
5665#[target_feature(enable = "avx512dq")]
5666#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
5667#[rustc_legacy_const_generics(4, 5)]
5668#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5669pub fn _mm_mask_range_round_sd<const IMM8: i32, const SAE: i32>(
5670 src: __m128d,
5671 k: __mmask8,
5672 a: __m128d,
5673 b: __m128d,
5674) -> __m128d {
5675 unsafe {
5676 static_assert_uimm_bits!(IMM8, 4);
5677 static_assert_sae!(SAE);
5678 transmute(src:vrangesd(
5679 a.as_f64x2(),
5680 b.as_f64x2(),
5681 src.as_f64x2(),
5682 k,
5683 IMM8,
5684 SAE,
5685 ))
5686 }
5687}
5688
5689/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5690/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5691/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5692/// element from a to the upper element of dst.
5693/// Lower 2 bits of IMM8 specifies the operation control:
5694/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5695/// Upper 2 bits of IMM8 specifies the sign control:
5696/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5697/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5698///
5699/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_sd&ig_expand=5215)
5700#[inline]
5701#[target_feature(enable = "avx512dq")]
5702#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
5703#[rustc_legacy_const_generics(3, 4)]
5704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5705pub fn _mm_maskz_range_round_sd<const IMM8: i32, const SAE: i32>(
5706 k: __mmask8,
5707 a: __m128d,
5708 b: __m128d,
5709) -> __m128d {
5710 static_assert_uimm_bits!(IMM8, 4);
5711 static_assert_sae!(SAE);
5712 _mm_mask_range_round_sd::<IMM8, SAE>(src:_mm_setzero_pd(), k, a, b)
5713}
5714
5715/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5716/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5717/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5718/// upper element from a to the upper element of dst.
5719/// Lower 2 bits of IMM8 specifies the operation control:
5720/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5721/// Upper 2 bits of IMM8 specifies the sign control:
5722/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5723///
5724/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_sd&ig_expand=5220)
5725#[inline]
5726#[target_feature(enable = "avx512dq")]
5727#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))]
5728#[rustc_legacy_const_generics(4)]
5729#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5730pub fn _mm_mask_range_sd<const IMM8: i32>(
5731 src: __m128d,
5732 k: __mmask8,
5733 a: __m128d,
5734 b: __m128d,
5735) -> __m128d {
5736 unsafe {
5737 static_assert_uimm_bits!(IMM8, 4);
5738 transmute(src:vrangesd(
5739 a.as_f64x2(),
5740 b.as_f64x2(),
5741 src.as_f64x2(),
5742 k,
5743 IMM8,
5744 _MM_FROUND_CUR_DIRECTION,
5745 ))
5746 }
5747}
5748
5749/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5750/// double-precision (64-bit) floating-point element in a and b, store the result in the lower element
5751/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5752/// element from a to the upper element of dst.
5753/// Lower 2 bits of IMM8 specifies the operation control:
5754/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5755/// Upper 2 bits of IMM8 specifies the sign control:
5756/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5757///
5758/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_sd&ig_expand=5221)
5759#[inline]
5760#[target_feature(enable = "avx512dq")]
5761#[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))]
5762#[rustc_legacy_const_generics(3)]
5763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5764pub fn _mm_maskz_range_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
5765 static_assert_uimm_bits!(IMM8, 4);
5766 _mm_mask_range_sd::<IMM8>(src:_mm_setzero_pd(), k, a, b)
5767}
5768
5769/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5770/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5771/// of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
5772/// Lower 2 bits of IMM8 specifies the operation control:
5773/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5774/// Upper 2 bits of IMM8 specifies the sign control:
5775/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5776/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5777///
5778/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_range_round_ss&ig_expand=5219)
5779#[inline]
5780#[target_feature(enable = "avx512dq")]
5781#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
5782#[rustc_legacy_const_generics(2, 3)]
5783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5784pub fn _mm_range_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
5785 static_assert_uimm_bits!(IMM8, 4);
5786 static_assert_sae!(SAE);
5787 _mm_mask_range_round_ss::<IMM8, SAE>(src:_mm_setzero_ps(), k:0xff, a, b)
5788}
5789
5790/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5791/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5792/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5793/// upper 3 packed elements from a to the upper elements of dst.
5794/// Lower 2 bits of IMM8 specifies the operation control:
5795/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5796/// Upper 2 bits of IMM8 specifies the sign control:
5797/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5798/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5799///
5800/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_ss&ig_expand=5217)
5801#[inline]
5802#[target_feature(enable = "avx512dq")]
5803#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
5804#[rustc_legacy_const_generics(4, 5)]
5805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5806pub fn _mm_mask_range_round_ss<const IMM8: i32, const SAE: i32>(
5807 src: __m128,
5808 k: __mmask8,
5809 a: __m128,
5810 b: __m128,
5811) -> __m128 {
5812 unsafe {
5813 static_assert_uimm_bits!(IMM8, 4);
5814 static_assert_sae!(SAE);
5815 transmute(src:vrangess(
5816 a.as_f32x4(),
5817 b.as_f32x4(),
5818 src.as_f32x4(),
5819 k,
5820 IMM8,
5821 SAE,
5822 ))
5823 }
5824}
5825
5826/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5827/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5828/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5829/// 3 packed elements from a to the upper elements of dst.
5830/// Lower 2 bits of IMM8 specifies the operation control:
5831/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5832/// Upper 2 bits of IMM8 specifies the sign control:
5833/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5834/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5835///
5836/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_ss&ig_expand=5218)
5837#[inline]
5838#[target_feature(enable = "avx512dq")]
5839#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
5840#[rustc_legacy_const_generics(3, 4)]
5841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5842pub fn _mm_maskz_range_round_ss<const IMM8: i32, const SAE: i32>(
5843 k: __mmask8,
5844 a: __m128,
5845 b: __m128,
5846) -> __m128 {
5847 static_assert_uimm_bits!(IMM8, 4);
5848 static_assert_sae!(SAE);
5849 _mm_mask_range_round_ss::<IMM8, SAE>(src:_mm_setzero_ps(), k, a, b)
5850}
5851
5852/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5853/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5854/// of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the
5855/// upper 3 packed elements from a to the upper elements of dst.
5856/// Lower 2 bits of IMM8 specifies the operation control:
5857/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5858/// Upper 2 bits of IMM8 specifies the sign control:
5859/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5860///
5861/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ss&ig_expand=5222)
5862#[inline]
5863#[target_feature(enable = "avx512dq")]
5864#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))]
5865#[rustc_legacy_const_generics(4)]
5866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5867pub fn _mm_mask_range_ss<const IMM8: i32>(
5868 src: __m128,
5869 k: __mmask8,
5870 a: __m128,
5871 b: __m128,
5872) -> __m128 {
5873 unsafe {
5874 static_assert_uimm_bits!(IMM8, 4);
5875 transmute(src:vrangess(
5876 a.as_f32x4(),
5877 b.as_f32x4(),
5878 src.as_f32x4(),
5879 k,
5880 IMM8,
5881 _MM_FROUND_CUR_DIRECTION,
5882 ))
5883 }
5884}
5885
5886/// Calculate the max, min, absolute max, or absolute min (depending on control in imm8) for the lower
5887/// single-precision (32-bit) floating-point element in a and b, store the result in the lower element
5888/// of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper
5889/// 3 packed elements from a to the upper elements of dst.
5890/// Lower 2 bits of IMM8 specifies the operation control:
5891/// 00 = min, 01 = max, 10 = absolute min, 11 = absolute max.
5892/// Upper 2 bits of IMM8 specifies the sign control:
5893/// 00 = sign from a, 01 = sign from compare result, 10 = clear sign bit, 11 = set sign bit.
5894///
5895/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ss&ig_expand=5223)
5896#[inline]
5897#[target_feature(enable = "avx512dq")]
5898#[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))]
5899#[rustc_legacy_const_generics(3)]
5900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5901pub fn _mm_maskz_range_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
5902 static_assert_uimm_bits!(IMM8, 4);
5903 _mm_mask_range_ss::<IMM8>(src:_mm_setzero_ps(), k, a, b)
5904}
5905
5906// Reduce
5907
5908/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5909/// the number of bits specified by imm8, and store the results in dst.
5910/// Rounding is done according to the imm8 parameter, which can be one of:
5911///
5912/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5913/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5914/// * [`_MM_FROUND_TO_POS_INF`] : round up
5915/// * [`_MM_FROUND_TO_ZERO`] : truncate
5916/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5917///
5918/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5919///
5920/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_pd&ig_expand=5438)
5921#[inline]
5922#[target_feature(enable = "avx512dq")]
5923#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
5924#[rustc_legacy_const_generics(1, 2)]
5925#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5926pub fn _mm512_reduce_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
5927 static_assert_uimm_bits!(IMM8, 8);
5928 static_assert_sae!(SAE);
5929 _mm512_mask_reduce_round_pd::<IMM8, SAE>(src:_mm512_undefined_pd(), k:0xff, a)
5930}
5931
5932/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5933/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
5934/// copied from src to dst if the corresponding mask bit is not set).
5935/// Rounding is done according to the imm8 parameter, which can be one of:
5936///
5937/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5938/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5939/// * [`_MM_FROUND_TO_POS_INF`] : round up
5940/// * [`_MM_FROUND_TO_ZERO`] : truncate
5941/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5942///
5943/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5944///
5945/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_pd&ig_expand=5436)
5946#[inline]
5947#[target_feature(enable = "avx512dq")]
5948#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
5949#[rustc_legacy_const_generics(3, 4)]
5950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5951pub fn _mm512_mask_reduce_round_pd<const IMM8: i32, const SAE: i32>(
5952 src: __m512d,
5953 k: __mmask8,
5954 a: __m512d,
5955) -> __m512d {
5956 unsafe {
5957 static_assert_uimm_bits!(IMM8, 8);
5958 static_assert_sae!(SAE);
5959 transmute(src:vreducepd_512(a.as_f64x8(), IMM8, src.as_f64x8(), k, SAE))
5960 }
5961}
5962
5963/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5964/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
5965/// zeroed out if the corresponding mask bit is not set).
5966/// Rounding is done according to the imm8 parameter, which can be one of:
5967///
5968/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5969/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5970/// * [`_MM_FROUND_TO_POS_INF`] : round up
5971/// * [`_MM_FROUND_TO_ZERO`] : truncate
5972/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5973///
5974/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
5975///
5976/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_pd&ig_expand=5437)
5977#[inline]
5978#[target_feature(enable = "avx512dq")]
5979#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0, SAE = 8))]
5980#[rustc_legacy_const_generics(2, 3)]
5981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5982pub fn _mm512_maskz_reduce_round_pd<const IMM8: i32, const SAE: i32>(
5983 k: __mmask8,
5984 a: __m512d,
5985) -> __m512d {
5986 static_assert_uimm_bits!(IMM8, 8);
5987 static_assert_sae!(SAE);
5988 _mm512_mask_reduce_round_pd::<IMM8, SAE>(src:_mm512_setzero_pd(), k, a)
5989}
5990
5991/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
5992/// the number of bits specified by imm8, and store the results in dst.
5993/// Rounding is done according to the imm8 parameter, which can be one of:
5994///
5995/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5996/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5997/// * [`_MM_FROUND_TO_POS_INF`] : round up
5998/// * [`_MM_FROUND_TO_ZERO`] : truncate
5999/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6000///
6001/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_pd&ig_expand=5411)
6002#[inline]
6003#[target_feature(enable = "avx512dq,avx512vl")]
6004#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6005#[rustc_legacy_const_generics(1)]
6006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6007pub fn _mm_reduce_pd<const IMM8: i32>(a: __m128d) -> __m128d {
6008 static_assert_uimm_bits!(IMM8, 8);
6009 _mm_mask_reduce_pd::<IMM8>(src:_mm_undefined_pd(), k:0xff, a)
6010}
6011
6012/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6013/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6014/// copied from src to dst if the corresponding mask bit is not set).
6015/// Rounding is done according to the imm8 parameter, which can be one of:
6016///
6017/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6018/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6019/// * [`_MM_FROUND_TO_POS_INF`] : round up
6020/// * [`_MM_FROUND_TO_ZERO`] : truncate
6021/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6022///
6023/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_pd&ig_expand=5409)
6024#[inline]
6025#[target_feature(enable = "avx512dq,avx512vl")]
6026#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6027#[rustc_legacy_const_generics(3)]
6028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6029pub fn _mm_mask_reduce_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
6030 unsafe {
6031 static_assert_uimm_bits!(IMM8, 8);
6032 transmute(src:vreducepd_128(a.as_f64x2(), IMM8, src.as_f64x2(), k))
6033 }
6034}
6035
6036/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6037/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6038/// zeroed out if the corresponding mask bit is not set).
6039/// Rounding is done according to the imm8 parameter, which can be one of:
6040///
6041/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6042/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6043/// * [`_MM_FROUND_TO_POS_INF`] : round up
6044/// * [`_MM_FROUND_TO_ZERO`] : truncate
6045/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6046///
6047/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_pd&ig_expand=5410)
6048#[inline]
6049#[target_feature(enable = "avx512dq,avx512vl")]
6050#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6051#[rustc_legacy_const_generics(2)]
6052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6053pub fn _mm_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
6054 static_assert_uimm_bits!(IMM8, 8);
6055 _mm_mask_reduce_pd::<IMM8>(src:_mm_setzero_pd(), k, a)
6056}
6057
6058/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6059/// the number of bits specified by imm8, and store the results in dst.
6060/// Rounding is done according to the imm8 parameter, which can be one of:
6061///
6062/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6063/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6064/// * [`_MM_FROUND_TO_POS_INF`] : round up
6065/// * [`_MM_FROUND_TO_ZERO`] : truncate
6066/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6067///
6068/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_pd&ig_expand=5414)
6069#[inline]
6070#[target_feature(enable = "avx512dq,avx512vl")]
6071#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6072#[rustc_legacy_const_generics(1)]
6073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6074pub fn _mm256_reduce_pd<const IMM8: i32>(a: __m256d) -> __m256d {
6075 static_assert_uimm_bits!(IMM8, 8);
6076 _mm256_mask_reduce_pd::<IMM8>(src:_mm256_undefined_pd(), k:0xff, a)
6077}
6078
6079/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6080/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6081/// copied from src to dst if the corresponding mask bit is not set).
6082/// Rounding is done according to the imm8 parameter, which can be one of:
6083///
6084/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6085/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6086/// * [`_MM_FROUND_TO_POS_INF`] : round up
6087/// * [`_MM_FROUND_TO_ZERO`] : truncate
6088/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6089///
6090/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_pd&ig_expand=5412)
6091#[inline]
6092#[target_feature(enable = "avx512dq,avx512vl")]
6093#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6094#[rustc_legacy_const_generics(3)]
6095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6096pub fn _mm256_mask_reduce_pd<const IMM8: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
6097 unsafe {
6098 static_assert_uimm_bits!(IMM8, 8);
6099 transmute(src:vreducepd_256(a.as_f64x4(), IMM8, src.as_f64x4(), k))
6100 }
6101}
6102
6103/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6104/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6105/// zeroed out if the corresponding mask bit is not set).
6106/// Rounding is done according to the imm8 parameter, which can be one of:
6107///
6108/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6109/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6110/// * [`_MM_FROUND_TO_POS_INF`] : round up
6111/// * [`_MM_FROUND_TO_ZERO`] : truncate
6112/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6113///
6114/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_pd&ig_expand=5413)
6115#[inline]
6116#[target_feature(enable = "avx512dq,avx512vl")]
6117#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6118#[rustc_legacy_const_generics(2)]
6119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6120pub fn _mm256_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
6121 static_assert_uimm_bits!(IMM8, 8);
6122 _mm256_mask_reduce_pd::<IMM8>(src:_mm256_setzero_pd(), k, a)
6123}
6124
6125/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6126/// the number of bits specified by imm8, and store the results in dst.
6127/// Rounding is done according to the imm8 parameter, which can be one of:
6128///
6129/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6130/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6131/// * [`_MM_FROUND_TO_POS_INF`] : round up
6132/// * [`_MM_FROUND_TO_ZERO`] : truncate
6133/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6134///
6135/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_pd&ig_expand=5417)
6136#[inline]
6137#[target_feature(enable = "avx512dq")]
6138#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6139#[rustc_legacy_const_generics(1)]
6140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6141pub fn _mm512_reduce_pd<const IMM8: i32>(a: __m512d) -> __m512d {
6142 static_assert_uimm_bits!(IMM8, 8);
6143 _mm512_mask_reduce_pd::<IMM8>(src:_mm512_undefined_pd(), k:0xff, a)
6144}
6145
6146/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6147/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6148/// copied from src to dst if the corresponding mask bit is not set).
6149/// Rounding is done according to the imm8 parameter, which can be one of:
6150///
6151/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6152/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6153/// * [`_MM_FROUND_TO_POS_INF`] : round up
6154/// * [`_MM_FROUND_TO_ZERO`] : truncate
6155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6156///
6157/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_pd&ig_expand=5415)
6158#[inline]
6159#[target_feature(enable = "avx512dq")]
6160#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6161#[rustc_legacy_const_generics(3)]
6162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6163pub fn _mm512_mask_reduce_pd<const IMM8: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
6164 unsafe {
6165 static_assert_uimm_bits!(IMM8, 8);
6166 transmute(src:vreducepd_512(
6167 a.as_f64x8(),
6168 IMM8,
6169 src.as_f64x8(),
6170 k,
6171 _MM_FROUND_CUR_DIRECTION,
6172 ))
6173 }
6174}
6175
6176/// Extract the reduced argument of packed double-precision (64-bit) floating-point elements in a by
6177/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6178/// zeroed out if the corresponding mask bit is not set).
6179/// Rounding is done according to the imm8 parameter, which can be one of:
6180///
6181/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6182/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6183/// * [`_MM_FROUND_TO_POS_INF`] : round up
6184/// * [`_MM_FROUND_TO_ZERO`] : truncate
6185/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6186///
6187/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_pd&ig_expand=5416)
6188#[inline]
6189#[target_feature(enable = "avx512dq")]
6190#[cfg_attr(test, assert_instr(vreducepd, IMM8 = 0))]
6191#[rustc_legacy_const_generics(2)]
6192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6193pub fn _mm512_maskz_reduce_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
6194 static_assert_uimm_bits!(IMM8, 8);
6195 _mm512_mask_reduce_pd::<IMM8>(src:_mm512_setzero_pd(), k, a)
6196}
6197
6198/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6199/// the number of bits specified by imm8, and store the results in dst.
6200/// Rounding is done according to the imm8 parameter, which can be one of:
6201///
6202/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6203/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6204/// * [`_MM_FROUND_TO_POS_INF`] : round up
6205/// * [`_MM_FROUND_TO_ZERO`] : truncate
6206/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6207///
6208/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6209///
6210/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_round_ps&ig_expand=5444)
6211#[inline]
6212#[target_feature(enable = "avx512dq")]
6213#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
6214#[rustc_legacy_const_generics(1, 2)]
6215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6216pub fn _mm512_reduce_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
6217 static_assert_uimm_bits!(IMM8, 8);
6218 static_assert_sae!(SAE);
6219 _mm512_mask_reduce_round_ps::<IMM8, SAE>(src:_mm512_undefined_ps(), k:0xffff, a)
6220}
6221
6222/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6223/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6224/// copied from src to dst if the corresponding mask bit is not set).
6225/// Rounding is done according to the imm8 parameter, which can be one of:
6226///
6227/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6228/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6229/// * [`_MM_FROUND_TO_POS_INF`] : round up
6230/// * [`_MM_FROUND_TO_ZERO`] : truncate
6231/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6232///
6233/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6234///
6235/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_round_ps&ig_expand=5442)
6236#[inline]
6237#[target_feature(enable = "avx512dq")]
6238#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
6239#[rustc_legacy_const_generics(3, 4)]
6240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6241pub fn _mm512_mask_reduce_round_ps<const IMM8: i32, const SAE: i32>(
6242 src: __m512,
6243 k: __mmask16,
6244 a: __m512,
6245) -> __m512 {
6246 unsafe {
6247 static_assert_uimm_bits!(IMM8, 8);
6248 static_assert_sae!(SAE);
6249 transmute(src:vreduceps_512(a.as_f32x16(), IMM8, src.as_f32x16(), k, SAE))
6250 }
6251}
6252
6253/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6254/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6255/// zeroed out if the corresponding mask bit is not set).
6256/// Rounding is done according to the imm8 parameter, which can be one of:
6257///
6258/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6259/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6260/// * [`_MM_FROUND_TO_POS_INF`] : round up
6261/// * [`_MM_FROUND_TO_ZERO`] : truncate
6262/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6263///
6264/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6265///
6266/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_round_ps&ig_expand=5443)
6267#[inline]
6268#[target_feature(enable = "avx512dq")]
6269#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0, SAE = 8))]
6270#[rustc_legacy_const_generics(2, 3)]
6271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6272pub fn _mm512_maskz_reduce_round_ps<const IMM8: i32, const SAE: i32>(
6273 k: __mmask16,
6274 a: __m512,
6275) -> __m512 {
6276 static_assert_uimm_bits!(IMM8, 8);
6277 static_assert_sae!(SAE);
6278 _mm512_mask_reduce_round_ps::<IMM8, SAE>(src:_mm512_setzero_ps(), k, a)
6279}
6280
6281/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6282/// the number of bits specified by imm8, and store the results in dst.
6283/// Rounding is done according to the imm8 parameter, which can be one of:
6284///
6285/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6286/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6287/// * [`_MM_FROUND_TO_POS_INF`] : round up
6288/// * [`_MM_FROUND_TO_ZERO`] : truncate
6289/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6290///
6291/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ps&ig_expand=5429)
6292#[inline]
6293#[target_feature(enable = "avx512dq,avx512vl")]
6294#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6295#[rustc_legacy_const_generics(1)]
6296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6297pub fn _mm_reduce_ps<const IMM8: i32>(a: __m128) -> __m128 {
6298 static_assert_uimm_bits!(IMM8, 8);
6299 _mm_mask_reduce_ps::<IMM8>(src:_mm_undefined_ps(), k:0xff, a)
6300}
6301
6302/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6303/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6304/// copied from src to dst if the corresponding mask bit is not set).
6305/// Rounding is done according to the imm8 parameter, which can be one of:
6306///
6307/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6308/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6309/// * [`_MM_FROUND_TO_POS_INF`] : round up
6310/// * [`_MM_FROUND_TO_ZERO`] : truncate
6311/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6312///
6313/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ps&ig_expand=5427)
6314#[inline]
6315#[target_feature(enable = "avx512dq,avx512vl")]
6316#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6317#[rustc_legacy_const_generics(3)]
6318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6319pub fn _mm_mask_reduce_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
6320 unsafe {
6321 static_assert_uimm_bits!(IMM8, 8);
6322 transmute(src:vreduceps_128(a.as_f32x4(), IMM8, src.as_f32x4(), k))
6323 }
6324}
6325
6326/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6327/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6328/// zeroed out if the corresponding mask bit is not set).
6329/// Rounding is done according to the imm8 parameter, which can be one of:
6330///
6331/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6332/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6333/// * [`_MM_FROUND_TO_POS_INF`] : round up
6334/// * [`_MM_FROUND_TO_ZERO`] : truncate
6335/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6336///
6337/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ps&ig_expand=5428)
6338#[inline]
6339#[target_feature(enable = "avx512dq,avx512vl")]
6340#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6341#[rustc_legacy_const_generics(2)]
6342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6343pub fn _mm_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
6344 static_assert_uimm_bits!(IMM8, 8);
6345 _mm_mask_reduce_ps::<IMM8>(src:_mm_setzero_ps(), k, a)
6346}
6347
6348/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6349/// the number of bits specified by imm8, and store the results in dst.
6350/// Rounding is done according to the imm8 parameter, which can be one of:
6351///
6352/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6353/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6354/// * [`_MM_FROUND_TO_POS_INF`] : round up
6355/// * [`_MM_FROUND_TO_ZERO`] : truncate
6356/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6357///
6358/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_reduce_ps&ig_expand=5432)
6359#[inline]
6360#[target_feature(enable = "avx512dq,avx512vl")]
6361#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6362#[rustc_legacy_const_generics(1)]
6363#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6364pub fn _mm256_reduce_ps<const IMM8: i32>(a: __m256) -> __m256 {
6365 static_assert_uimm_bits!(IMM8, 8);
6366 _mm256_mask_reduce_ps::<IMM8>(src:_mm256_undefined_ps(), k:0xff, a)
6367}
6368
6369/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6370/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6371/// copied from src to dst if the corresponding mask bit is not set).
6372/// Rounding is done according to the imm8 parameter, which can be one of:
6373///
6374/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6375/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6376/// * [`_MM_FROUND_TO_POS_INF`] : round up
6377/// * [`_MM_FROUND_TO_ZERO`] : truncate
6378/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6379///
6380/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_reduce_ps&ig_expand=5430)
6381#[inline]
6382#[target_feature(enable = "avx512dq,avx512vl")]
6383#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6384#[rustc_legacy_const_generics(3)]
6385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6386pub fn _mm256_mask_reduce_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
6387 unsafe {
6388 static_assert_uimm_bits!(IMM8, 8);
6389 transmute(src:vreduceps_256(a.as_f32x8(), IMM8, src.as_f32x8(), k))
6390 }
6391}
6392
6393/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6394/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6395/// zeroed out if the corresponding mask bit is not set).
6396/// Rounding is done according to the imm8 parameter, which can be one of:
6397///
6398/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6399/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6400/// * [`_MM_FROUND_TO_POS_INF`] : round up
6401/// * [`_MM_FROUND_TO_ZERO`] : truncate
6402/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6403///
6404/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_reduce_ps&ig_expand=5431)
6405#[inline]
6406#[target_feature(enable = "avx512dq,avx512vl")]
6407#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6408#[rustc_legacy_const_generics(2)]
6409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6410pub fn _mm256_maskz_reduce_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
6411 static_assert_uimm_bits!(IMM8, 8);
6412 _mm256_mask_reduce_ps::<IMM8>(src:_mm256_setzero_ps(), k, a)
6413}
6414
6415/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6416/// the number of bits specified by imm8, and store the results in dst.
6417/// Rounding is done according to the imm8 parameter, which can be one of:
6418///
6419/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6420/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6421/// * [`_MM_FROUND_TO_POS_INF`] : round up
6422/// * [`_MM_FROUND_TO_ZERO`] : truncate
6423/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6424///
6425/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_ps&ig_expand=5435)
6426#[inline]
6427#[target_feature(enable = "avx512dq")]
6428#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6429#[rustc_legacy_const_generics(1)]
6430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6431pub fn _mm512_reduce_ps<const IMM8: i32>(a: __m512) -> __m512 {
6432 static_assert_uimm_bits!(IMM8, 8);
6433 _mm512_mask_reduce_ps::<IMM8>(src:_mm512_undefined_ps(), k:0xffff, a)
6434}
6435
6436/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6437/// the number of bits specified by imm8, and store the results in dst using writemask k (elements are
6438/// copied from src to dst if the corresponding mask bit is not set).
6439/// Rounding is done according to the imm8 parameter, which can be one of:
6440///
6441/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6442/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6443/// * [`_MM_FROUND_TO_POS_INF`] : round up
6444/// * [`_MM_FROUND_TO_ZERO`] : truncate
6445/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6446///
6447/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_ps&ig_expand=5433)
6448#[inline]
6449#[target_feature(enable = "avx512dq")]
6450#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6451#[rustc_legacy_const_generics(3)]
6452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6453pub fn _mm512_mask_reduce_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
6454 unsafe {
6455 static_assert_uimm_bits!(IMM8, 8);
6456 transmute(src:vreduceps_512(
6457 a.as_f32x16(),
6458 IMM8,
6459 src.as_f32x16(),
6460 k,
6461 _MM_FROUND_CUR_DIRECTION,
6462 ))
6463 }
6464}
6465
6466/// Extract the reduced argument of packed single-precision (32-bit) floating-point elements in a by
6467/// the number of bits specified by imm8, and store the results in dst using zeromask k (elements are
6468/// zeroed out if the corresponding mask bit is not set).
6469/// Rounding is done according to the imm8 parameter, which can be one of:
6470///
6471/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6472/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6473/// * [`_MM_FROUND_TO_POS_INF`] : round up
6474/// * [`_MM_FROUND_TO_ZERO`] : truncate
6475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6476///
6477/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_reduce_ps&ig_expand=5434)
6478#[inline]
6479#[target_feature(enable = "avx512dq")]
6480#[cfg_attr(test, assert_instr(vreduceps, IMM8 = 0))]
6481#[rustc_legacy_const_generics(2)]
6482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6483pub fn _mm512_maskz_reduce_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
6484 static_assert_uimm_bits!(IMM8, 8);
6485 _mm512_mask_reduce_ps::<IMM8>(src:_mm512_setzero_ps(), k, a)
6486}
6487
6488/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6489/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
6490/// the upper element from a to the upper element of dst.
6491/// Rounding is done according to the imm8 parameter, which can be one of:
6492///
6493/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6494/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6495/// * [`_MM_FROUND_TO_POS_INF`] : round up
6496/// * [`_MM_FROUND_TO_ZERO`] : truncate
6497/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6498///
6499/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6500///
6501/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_sd&ig_expand=5447)
6502#[inline]
6503#[target_feature(enable = "avx512dq")]
6504#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
6505#[rustc_legacy_const_generics(2, 3)]
6506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6507pub fn _mm_reduce_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
6508 static_assert_uimm_bits!(IMM8, 8);
6509 static_assert_sae!(SAE);
6510 _mm_mask_reduce_round_sd::<IMM8, SAE>(src:_mm_undefined_pd(), k:0xff, a, b)
6511}
6512
6513/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6514/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6515/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a
6516/// to the upper element of dst.
6517/// Rounding is done according to the imm8 parameter, which can be one of:
6518///
6519/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6520/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6521/// * [`_MM_FROUND_TO_POS_INF`] : round up
6522/// * [`_MM_FROUND_TO_ZERO`] : truncate
6523/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6524///
6525/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6526///
6527/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_sd&ig_expand=5445)
6528#[inline]
6529#[target_feature(enable = "avx512dq")]
6530#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
6531#[rustc_legacy_const_generics(4, 5)]
6532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6533pub fn _mm_mask_reduce_round_sd<const IMM8: i32, const SAE: i32>(
6534 src: __m128d,
6535 k: __mmask8,
6536 a: __m128d,
6537 b: __m128d,
6538) -> __m128d {
6539 unsafe {
6540 static_assert_uimm_bits!(IMM8, 8);
6541 static_assert_sae!(SAE);
6542 transmute(src:vreducesd(
6543 a.as_f64x2(),
6544 b.as_f64x2(),
6545 src.as_f64x2(),
6546 k,
6547 IMM8,
6548 SAE,
6549 ))
6550 }
6551}
6552
6553/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6554/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6555/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a
6556/// to the upper element of dst.
6557/// Rounding is done according to the imm8 parameter, which can be one of:
6558///
6559/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6560/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6561/// * [`_MM_FROUND_TO_POS_INF`] : round up
6562/// * [`_MM_FROUND_TO_ZERO`] : truncate
6563/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6564///
6565/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6566///
6567/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_sd&ig_expand=5446)
6568#[inline]
6569#[target_feature(enable = "avx512dq")]
6570#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
6571#[rustc_legacy_const_generics(3, 4)]
6572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6573pub fn _mm_maskz_reduce_round_sd<const IMM8: i32, const SAE: i32>(
6574 k: __mmask8,
6575 a: __m128d,
6576 b: __m128d,
6577) -> __m128d {
6578 static_assert_uimm_bits!(IMM8, 8);
6579 static_assert_sae!(SAE);
6580 _mm_mask_reduce_round_sd::<IMM8, SAE>(src:_mm_setzero_pd(), k, a, b)
6581}
6582
6583/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6584/// by the number of bits specified by imm8, store the result in the lower element of dst using, and
6585/// copy the upper element from a.
6586/// to the upper element of dst.
6587/// Rounding is done according to the imm8 parameter, which can be one of:
6588///
6589/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6590/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6591/// * [`_MM_FROUND_TO_POS_INF`] : round up
6592/// * [`_MM_FROUND_TO_ZERO`] : truncate
6593/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6594///
6595/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_sd&ig_expand=5456)
6596#[inline]
6597#[target_feature(enable = "avx512dq")]
6598#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
6599#[rustc_legacy_const_generics(2)]
6600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6601pub fn _mm_reduce_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
6602 static_assert_uimm_bits!(IMM8, 8);
6603 _mm_mask_reduce_sd::<IMM8>(src:_mm_undefined_pd(), k:0xff, a, b)
6604}
6605
6606/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6607/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6608/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a
6609/// to the upper element of dst.
6610/// Rounding is done according to the imm8 parameter, which can be one of:
6611///
6612/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6613/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6614/// * [`_MM_FROUND_TO_POS_INF`] : round up
6615/// * [`_MM_FROUND_TO_ZERO`] : truncate
6616/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6617///
6618/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_sd&ig_expand=5454)
6619#[inline]
6620#[target_feature(enable = "avx512dq")]
6621#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
6622#[rustc_legacy_const_generics(4)]
6623#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6624pub fn _mm_mask_reduce_sd<const IMM8: i32>(
6625 src: __m128d,
6626 k: __mmask8,
6627 a: __m128d,
6628 b: __m128d,
6629) -> __m128d {
6630 unsafe {
6631 static_assert_uimm_bits!(IMM8, 8);
6632 transmute(src:vreducesd(
6633 a.as_f64x2(),
6634 b.as_f64x2(),
6635 src.as_f64x2(),
6636 k,
6637 IMM8,
6638 _MM_FROUND_CUR_DIRECTION,
6639 ))
6640 }
6641}
6642
6643/// Extract the reduced argument of the lower double-precision (64-bit) floating-point element in b
6644/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6645/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a
6646/// to the upper element of dst.
6647/// Rounding is done according to the imm8 parameter, which can be one of:
6648///
6649/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6650/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6651/// * [`_MM_FROUND_TO_POS_INF`] : round up
6652/// * [`_MM_FROUND_TO_ZERO`] : truncate
6653/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6654///
6655/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_sd&ig_expand=5455)
6656#[inline]
6657#[target_feature(enable = "avx512dq")]
6658#[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
6659#[rustc_legacy_const_generics(3)]
6660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6661pub fn _mm_maskz_reduce_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6662 static_assert_uimm_bits!(IMM8, 8);
6663 _mm_mask_reduce_sd::<IMM8>(src:_mm_setzero_pd(), k, a, b)
6664}
6665
6666/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6667/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
6668/// the upper element from a.
6669/// to the upper element of dst.
6670/// Rounding is done according to the imm8 parameter, which can be one of:
6671///
6672/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6673/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6674/// * [`_MM_FROUND_TO_POS_INF`] : round up
6675/// * [`_MM_FROUND_TO_ZERO`] : truncate
6676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6677///
6678/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6679///
6680/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_ss&ig_expand=5453)
6681#[inline]
6682#[target_feature(enable = "avx512dq")]
6683#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
6684#[rustc_legacy_const_generics(2, 3)]
6685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6686pub fn _mm_reduce_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
6687 static_assert_uimm_bits!(IMM8, 8);
6688 static_assert_sae!(SAE);
6689 _mm_mask_reduce_round_ss::<IMM8, SAE>(src:_mm_undefined_ps(), k:0xff, a, b)
6690}
6691
6692/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6693/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6694/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a.
6695/// to the upper element of dst.
6696/// Rounding is done according to the imm8 parameter, which can be one of:
6697///
6698/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6699/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6700/// * [`_MM_FROUND_TO_POS_INF`] : round up
6701/// * [`_MM_FROUND_TO_ZERO`] : truncate
6702/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6703///
6704/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6705///
6706/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_ss&ig_expand=5451)
6707#[inline]
6708#[target_feature(enable = "avx512dq")]
6709#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
6710#[rustc_legacy_const_generics(4, 5)]
6711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6712pub fn _mm_mask_reduce_round_ss<const IMM8: i32, const SAE: i32>(
6713 src: __m128,
6714 k: __mmask8,
6715 a: __m128,
6716 b: __m128,
6717) -> __m128 {
6718 unsafe {
6719 static_assert_uimm_bits!(IMM8, 8);
6720 static_assert_sae!(SAE);
6721 transmute(src:vreducess(
6722 a.as_f32x4(),
6723 b.as_f32x4(),
6724 src.as_f32x4(),
6725 k,
6726 IMM8,
6727 SAE,
6728 ))
6729 }
6730}
6731
6732/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6733/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6734/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a.
6735/// to the upper element of dst.
6736/// Rounding is done according to the imm8 parameter, which can be one of:
6737///
6738/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6739/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6740/// * [`_MM_FROUND_TO_POS_INF`] : round up
6741/// * [`_MM_FROUND_TO_ZERO`] : truncate
6742/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6743///
6744/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
6745///
6746/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_ss&ig_expand=5452)
6747#[inline]
6748#[target_feature(enable = "avx512dq")]
6749#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
6750#[rustc_legacy_const_generics(3, 4)]
6751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6752pub fn _mm_maskz_reduce_round_ss<const IMM8: i32, const SAE: i32>(
6753 k: __mmask8,
6754 a: __m128,
6755 b: __m128,
6756) -> __m128 {
6757 static_assert_uimm_bits!(IMM8, 8);
6758 static_assert_sae!(SAE);
6759 _mm_mask_reduce_round_ss::<IMM8, SAE>(src:_mm_setzero_ps(), k, a, b)
6760}
6761
6762/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6763/// by the number of bits specified by imm8, store the result in the lower element of dst, and copy
6764/// the upper element from a.
6765/// to the upper element of dst.
6766/// Rounding is done according to the imm8 parameter, which can be one of:
6767///
6768/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6769/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6770/// * [`_MM_FROUND_TO_POS_INF`] : round up
6771/// * [`_MM_FROUND_TO_ZERO`] : truncate
6772/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6773///
6774/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ss&ig_expand=5462)
6775#[inline]
6776#[target_feature(enable = "avx512dq")]
6777#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
6778#[rustc_legacy_const_generics(2)]
6779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6780pub fn _mm_reduce_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
6781 static_assert_uimm_bits!(IMM8, 8);
6782 _mm_mask_reduce_ss::<IMM8>(src:_mm_undefined_ps(), k:0xff, a, b)
6783}
6784
6785/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6786/// by the number of bits specified by imm8, store the result in the lower element of dst using writemask
6787/// k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a.
6788/// to the upper element of dst.
6789/// Rounding is done according to the imm8 parameter, which can be one of:
6790///
6791/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6792/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6793/// * [`_MM_FROUND_TO_POS_INF`] : round up
6794/// * [`_MM_FROUND_TO_ZERO`] : truncate
6795/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6796///
6797/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ss&ig_expand=5460)
6798#[inline]
6799#[target_feature(enable = "avx512dq")]
6800#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
6801#[rustc_legacy_const_generics(4)]
6802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6803pub fn _mm_mask_reduce_ss<const IMM8: i32>(
6804 src: __m128,
6805 k: __mmask8,
6806 a: __m128,
6807 b: __m128,
6808) -> __m128 {
6809 unsafe {
6810 static_assert_uimm_bits!(IMM8, 8);
6811 transmute(src:vreducess(
6812 a.as_f32x4(),
6813 b.as_f32x4(),
6814 src.as_f32x4(),
6815 k,
6816 IMM8,
6817 _MM_FROUND_CUR_DIRECTION,
6818 ))
6819 }
6820}
6821
6822/// Extract the reduced argument of the lower single-precision (32-bit) floating-point element in b
6823/// by the number of bits specified by imm8, store the result in the lower element of dst using zeromask
6824/// k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a.
6825/// to the upper element of dst.
6826/// Rounding is done according to the imm8 parameter, which can be one of:
6827///
6828/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
6829/// * [`_MM_FROUND_TO_NEG_INF`] : round down
6830/// * [`_MM_FROUND_TO_POS_INF`] : round up
6831/// * [`_MM_FROUND_TO_ZERO`] : truncate
6832/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
6833///
6834/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ss&ig_expand=5461)
6835#[inline]
6836#[target_feature(enable = "avx512dq")]
6837#[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
6838#[rustc_legacy_const_generics(3)]
6839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6840pub fn _mm_maskz_reduce_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6841 static_assert_uimm_bits!(IMM8, 8);
6842 _mm_mask_reduce_ss::<IMM8>(src:_mm_setzero_ps(), k, a, b)
6843}
6844
6845// FP-Class
6846
6847/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6848/// by imm8, and store the results in mask vector k.
6849/// imm can be a combination of:
6850///
6851/// - 0x01 // QNaN
6852/// - 0x02 // Positive Zero
6853/// - 0x04 // Negative Zero
6854/// - 0x08 // Positive Infinity
6855/// - 0x10 // Negative Infinity
6856/// - 0x20 // Denormal
6857/// - 0x40 // Negative
6858/// - 0x80 // SNaN
6859///
6860/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_pd_mask&ig_expand=3493)
6861#[inline]
6862#[target_feature(enable = "avx512dq,avx512vl")]
6863#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6864#[rustc_legacy_const_generics(1)]
6865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6866pub fn _mm_fpclass_pd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 {
6867 static_assert_uimm_bits!(IMM8, 8);
6868 _mm_mask_fpclass_pd_mask::<IMM8>(k1:0xff, a)
6869}
6870
6871/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6872/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6873/// corresponding mask bit is not set).
6874/// imm can be a combination of:
6875///
6876/// - 0x01 // QNaN
6877/// - 0x02 // Positive Zero
6878/// - 0x04 // Negative Zero
6879/// - 0x08 // Positive Infinity
6880/// - 0x10 // Negative Infinity
6881/// - 0x20 // Denormal
6882/// - 0x40 // Negative
6883/// - 0x80 // SNaN
6884///
6885/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_pd_mask&ig_expand=3494)
6886#[inline]
6887#[target_feature(enable = "avx512dq,avx512vl")]
6888#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6889#[rustc_legacy_const_generics(2)]
6890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6891pub fn _mm_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 {
6892 unsafe {
6893 static_assert_uimm_bits!(IMM8, 8);
6894 transmute(src:vfpclasspd_128(a.as_f64x2(), IMM8, k:k1))
6895 }
6896}
6897
6898/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6899/// by imm8, and store the results in mask vector k.
6900/// imm can be a combination of:
6901///
6902/// - 0x01 // QNaN
6903/// - 0x02 // Positive Zero
6904/// - 0x04 // Negative Zero
6905/// - 0x08 // Positive Infinity
6906/// - 0x10 // Negative Infinity
6907/// - 0x20 // Denormal
6908/// - 0x40 // Negative
6909/// - 0x80 // SNaN
6910///
6911/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_pd_mask&ig_expand=3495)
6912#[inline]
6913#[target_feature(enable = "avx512dq,avx512vl")]
6914#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6915#[rustc_legacy_const_generics(1)]
6916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6917pub fn _mm256_fpclass_pd_mask<const IMM8: i32>(a: __m256d) -> __mmask8 {
6918 static_assert_uimm_bits!(IMM8, 8);
6919 _mm256_mask_fpclass_pd_mask::<IMM8>(k1:0xff, a)
6920}
6921
6922/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6923/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6924/// corresponding mask bit is not set).
6925/// imm can be a combination of:
6926///
6927/// - 0x01 // QNaN
6928/// - 0x02 // Positive Zero
6929/// - 0x04 // Negative Zero
6930/// - 0x08 // Positive Infinity
6931/// - 0x10 // Negative Infinity
6932/// - 0x20 // Denormal
6933/// - 0x40 // Negative
6934/// - 0x80 // SNaN
6935///
6936/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_pd_mask&ig_expand=3496)
6937#[inline]
6938#[target_feature(enable = "avx512dq,avx512vl")]
6939#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6940#[rustc_legacy_const_generics(2)]
6941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6942pub fn _mm256_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d) -> __mmask8 {
6943 unsafe {
6944 static_assert_uimm_bits!(IMM8, 8);
6945 transmute(src:vfpclasspd_256(a.as_f64x4(), IMM8, k:k1))
6946 }
6947}
6948
6949/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6950/// by imm8, and store the results in mask vector k.
6951/// imm can be a combination of:
6952///
6953/// - 0x01 // QNaN
6954/// - 0x02 // Positive Zero
6955/// - 0x04 // Negative Zero
6956/// - 0x08 // Positive Infinity
6957/// - 0x10 // Negative Infinity
6958/// - 0x20 // Denormal
6959/// - 0x40 // Negative
6960/// - 0x80 // SNaN
6961///
6962/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_pd_mask&ig_expand=3497)
6963#[inline]
6964#[target_feature(enable = "avx512dq")]
6965#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6966#[rustc_legacy_const_generics(1)]
6967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6968pub fn _mm512_fpclass_pd_mask<const IMM8: i32>(a: __m512d) -> __mmask8 {
6969 static_assert_uimm_bits!(IMM8, 8);
6970 _mm512_mask_fpclass_pd_mask::<IMM8>(k1:0xff, a)
6971}
6972
6973/// Test packed double-precision (64-bit) floating-point elements in a for special categories specified
6974/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
6975/// corresponding mask bit is not set).
6976/// imm can be a combination of:
6977///
6978/// - 0x01 // QNaN
6979/// - 0x02 // Positive Zero
6980/// - 0x04 // Negative Zero
6981/// - 0x08 // Positive Infinity
6982/// - 0x10 // Negative Infinity
6983/// - 0x20 // Denormal
6984/// - 0x40 // Negative
6985/// - 0x80 // SNaN
6986///
6987/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_pd_mask&ig_expand=3498)
6988#[inline]
6989#[target_feature(enable = "avx512dq")]
6990#[cfg_attr(test, assert_instr(vfpclasspd, IMM8 = 0))]
6991#[rustc_legacy_const_generics(2)]
6992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6993pub fn _mm512_mask_fpclass_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d) -> __mmask8 {
6994 unsafe {
6995 static_assert_uimm_bits!(IMM8, 8);
6996 transmute(src:vfpclasspd_512(a.as_f64x8(), IMM8, k:k1))
6997 }
6998}
6999
7000/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
7001/// by imm8, and store the results in mask vector k.
7002/// imm can be a combination of:
7003///
7004/// - 0x01 // QNaN
7005/// - 0x02 // Positive Zero
7006/// - 0x04 // Negative Zero
7007/// - 0x08 // Positive Infinity
7008/// - 0x10 // Negative Infinity
7009/// - 0x20 // Denormal
7010/// - 0x40 // Negative
7011/// - 0x80 // SNaN
7012///
7013/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ps_mask&ig_expand=3505)
7014#[inline]
7015#[target_feature(enable = "avx512dq,avx512vl")]
7016#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7017#[rustc_legacy_const_generics(1)]
7018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7019pub fn _mm_fpclass_ps_mask<const IMM8: i32>(a: __m128) -> __mmask8 {
7020 static_assert_uimm_bits!(IMM8, 8);
7021 _mm_mask_fpclass_ps_mask::<IMM8>(k1:0xff, a)
7022}
7023
7024/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
7025/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
7026/// corresponding mask bit is not set).
7027/// imm can be a combination of:
7028///
7029/// - 0x01 // QNaN
7030/// - 0x02 // Positive Zero
7031/// - 0x04 // Negative Zero
7032/// - 0x08 // Positive Infinity
7033/// - 0x10 // Negative Infinity
7034/// - 0x20 // Denormal
7035/// - 0x40 // Negative
7036/// - 0x80 // SNaN
7037///
7038/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ps_mask&ig_expand=3506)
7039#[inline]
7040#[target_feature(enable = "avx512dq,avx512vl")]
7041#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7042#[rustc_legacy_const_generics(2)]
7043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7044pub fn _mm_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 {
7045 unsafe {
7046 static_assert_uimm_bits!(IMM8, 8);
7047 transmute(src:vfpclassps_128(a.as_f32x4(), IMM8, k:k1))
7048 }
7049}
7050
7051/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
7052/// by imm8, and store the results in mask vector k.
7053/// imm can be a combination of:
7054///
7055/// - 0x01 // QNaN
7056/// - 0x02 // Positive Zero
7057/// - 0x04 // Negative Zero
7058/// - 0x08 // Positive Infinity
7059/// - 0x10 // Negative Infinity
7060/// - 0x20 // Denormal
7061/// - 0x40 // Negative
7062/// - 0x80 // SNaN
7063///
7064/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fpclass_ps_mask&ig_expand=3507)
7065#[inline]
7066#[target_feature(enable = "avx512dq,avx512vl")]
7067#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7068#[rustc_legacy_const_generics(1)]
7069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7070pub fn _mm256_fpclass_ps_mask<const IMM8: i32>(a: __m256) -> __mmask8 {
7071 static_assert_uimm_bits!(IMM8, 8);
7072 _mm256_mask_fpclass_ps_mask::<IMM8>(k1:0xff, a)
7073}
7074
7075/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
7076/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
7077/// corresponding mask bit is not set).
7078/// imm can be a combination of:
7079///
7080/// - 0x01 // QNaN
7081/// - 0x02 // Positive Zero
7082/// - 0x04 // Negative Zero
7083/// - 0x08 // Positive Infinity
7084/// - 0x10 // Negative Infinity
7085/// - 0x20 // Denormal
7086/// - 0x40 // Negative
7087/// - 0x80 // SNaN
7088///
7089/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fpclass_ps_mask&ig_expand=3508)
7090#[inline]
7091#[target_feature(enable = "avx512dq,avx512vl")]
7092#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7093#[rustc_legacy_const_generics(2)]
7094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7095pub fn _mm256_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256) -> __mmask8 {
7096 unsafe {
7097 static_assert_uimm_bits!(IMM8, 8);
7098 transmute(src:vfpclassps_256(a.as_f32x8(), IMM8, k:k1))
7099 }
7100}
7101
7102/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
7103/// by imm8, and store the results in mask vector k.
7104/// imm can be a combination of:
7105///
7106/// - 0x01 // QNaN
7107/// - 0x02 // Positive Zero
7108/// - 0x04 // Negative Zero
7109/// - 0x08 // Positive Infinity
7110/// - 0x10 // Negative Infinity
7111/// - 0x20 // Denormal
7112/// - 0x40 // Negative
7113/// - 0x80 // SNaN
7114///
7115/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fpclass_ps_mask&ig_expand=3509)
7116#[inline]
7117#[target_feature(enable = "avx512dq")]
7118#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7119#[rustc_legacy_const_generics(1)]
7120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7121pub fn _mm512_fpclass_ps_mask<const IMM8: i32>(a: __m512) -> __mmask16 {
7122 static_assert_uimm_bits!(IMM8, 8);
7123 _mm512_mask_fpclass_ps_mask::<IMM8>(k1:0xffff, a)
7124}
7125
7126/// Test packed single-precision (32-bit) floating-point elements in a for special categories specified
7127/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
7128/// corresponding mask bit is not set).
7129/// imm can be a combination of:
7130///
7131/// - 0x01 // QNaN
7132/// - 0x02 // Positive Zero
7133/// - 0x04 // Negative Zero
7134/// - 0x08 // Positive Infinity
7135/// - 0x10 // Negative Infinity
7136/// - 0x20 // Denormal
7137/// - 0x40 // Negative
7138/// - 0x80 // SNaN
7139///
7140/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fpclass_ps_mask&ig_expand=3510)
7141#[inline]
7142#[target_feature(enable = "avx512dq")]
7143#[cfg_attr(test, assert_instr(vfpclassps, IMM8 = 0))]
7144#[rustc_legacy_const_generics(2)]
7145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7146pub fn _mm512_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512) -> __mmask16 {
7147 unsafe {
7148 static_assert_uimm_bits!(IMM8, 8);
7149 transmute(src:vfpclassps_512(a.as_f32x16(), IMM8, k:k1))
7150 }
7151}
7152
7153/// Test the lower double-precision (64-bit) floating-point element in a for special categories specified
7154/// by imm8, and store the results in mask vector k.
7155/// imm can be a combination of:
7156///
7157/// - 0x01 // QNaN
7158/// - 0x02 // Positive Zero
7159/// - 0x04 // Negative Zero
7160/// - 0x08 // Positive Infinity
7161/// - 0x10 // Negative Infinity
7162/// - 0x20 // Denormal
7163/// - 0x40 // Negative
7164/// - 0x80 // SNaN
7165///
7166/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_sd_mask&ig_expand=3511)
7167#[inline]
7168#[target_feature(enable = "avx512dq")]
7169#[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))]
7170#[rustc_legacy_const_generics(1)]
7171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7172pub fn _mm_fpclass_sd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 {
7173 static_assert_uimm_bits!(IMM8, 8);
7174 _mm_mask_fpclass_sd_mask::<IMM8>(k1:0xff, a)
7175}
7176
7177/// Test the lower double-precision (64-bit) floating-point element in a for special categories specified
7178/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
7179/// corresponding mask bit is not set).
7180/// imm can be a combination of:
7181///
7182/// - 0x01 // QNaN
7183/// - 0x02 // Positive Zero
7184/// - 0x04 // Negative Zero
7185/// - 0x08 // Positive Infinity
7186/// - 0x10 // Negative Infinity
7187/// - 0x20 // Denormal
7188/// - 0x40 // Negative
7189/// - 0x80 // SNaN
7190///
7191/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_sd_mask&ig_expand=3512)
7192#[inline]
7193#[target_feature(enable = "avx512dq")]
7194#[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))]
7195#[rustc_legacy_const_generics(2)]
7196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7197pub fn _mm_mask_fpclass_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d) -> __mmask8 {
7198 unsafe {
7199 static_assert_uimm_bits!(IMM8, 8);
7200 vfpclasssd(a.as_f64x2(), IMM8, k:k1)
7201 }
7202}
7203
7204/// Test the lower single-precision (32-bit) floating-point element in a for special categories specified
7205/// by imm8, and store the results in mask vector k.
7206/// imm can be a combination of:
7207///
7208/// - 0x01 // QNaN
7209/// - 0x02 // Positive Zero
7210/// - 0x04 // Negative Zero
7211/// - 0x08 // Positive Infinity
7212/// - 0x10 // Negative Infinity
7213/// - 0x20 // Denormal
7214/// - 0x40 // Negative
7215/// - 0x80 // SNaN
7216///
7217/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fpclass_ss_mask&ig_expand=3515)
7218#[inline]
7219#[target_feature(enable = "avx512dq")]
7220#[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))]
7221#[rustc_legacy_const_generics(1)]
7222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7223pub fn _mm_fpclass_ss_mask<const IMM8: i32>(a: __m128) -> __mmask8 {
7224 static_assert_uimm_bits!(IMM8, 8);
7225 _mm_mask_fpclass_ss_mask::<IMM8>(k1:0xff, a)
7226}
7227
7228/// Test the lower single-precision (32-bit) floating-point element in a for special categories specified
7229/// by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the
7230/// corresponding mask bit is not set).
7231/// imm can be a combination of:
7232///
7233/// - 0x01 // QNaN
7234/// - 0x02 // Positive Zero
7235/// - 0x04 // Negative Zero
7236/// - 0x08 // Positive Infinity
7237/// - 0x10 // Negative Infinity
7238/// - 0x20 // Denormal
7239/// - 0x40 // Negative
7240/// - 0x80 // SNaN
7241///
7242/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ss_mask&ig_expand=3516)
7243#[inline]
7244#[target_feature(enable = "avx512dq")]
7245#[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))]
7246#[rustc_legacy_const_generics(2)]
7247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7248pub fn _mm_mask_fpclass_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128) -> __mmask8 {
7249 unsafe {
7250 static_assert_uimm_bits!(IMM8, 8);
7251 vfpclassss(a.as_f32x4(), IMM8, k:k1)
7252 }
7253}
7254
7255#[allow(improper_ctypes)]
7256unsafe extern "C" {
7257 #[link_name = "llvm.x86.avx512.sitofp.round.v2f64.v2i64"]
7258 unsafefn vcvtqq2pd_128(a: i64x2, rounding: i32) -> f64x2;
7259 #[link_name = "llvm.x86.avx512.sitofp.round.v4f64.v4i64"]
7260 unsafefn vcvtqq2pd_256(a: i64x4, rounding: i32) -> f64x4;
7261 #[link_name = "llvm.x86.avx512.sitofp.round.v8f64.v8i64"]
7262 unsafefn vcvtqq2pd_512(a: i64x8, rounding: i32) -> f64x8;
7263
7264 #[link_name = "llvm.x86.avx512.mask.cvtqq2ps.128"]
7265 unsafefn vcvtqq2ps_128(a: i64x2, src: f32x4, k: __mmask8) -> f32x4;
7266 #[link_name = "llvm.x86.avx512.sitofp.round.v4f32.v4i64"]
7267 unsafefn vcvtqq2ps_256(a: i64x4, rounding: i32) -> f32x4;
7268 #[link_name = "llvm.x86.avx512.sitofp.round.v8f32.v8i64"]
7269 unsafefn vcvtqq2ps_512(a: i64x8, rounding: i32) -> f32x8;
7270
7271 #[link_name = "llvm.x86.avx512.uitofp.round.v2f64.v2i64"]
7272 unsafefn vcvtuqq2pd_128(a: u64x2, rounding: i32) -> f64x2;
7273 #[link_name = "llvm.x86.avx512.uitofp.round.v4f64.v4i64"]
7274 unsafefn vcvtuqq2pd_256(a: u64x4, rounding: i32) -> f64x4;
7275 #[link_name = "llvm.x86.avx512.uitofp.round.v8f64.v8i64"]
7276 unsafefn vcvtuqq2pd_512(a: u64x8, rounding: i32) -> f64x8;
7277
7278 #[link_name = "llvm.x86.avx512.mask.cvtuqq2ps.128"]
7279 unsafefn vcvtuqq2ps_128(a: u64x2, src: f32x4, k: __mmask8) -> f32x4;
7280 #[link_name = "llvm.x86.avx512.uitofp.round.v4f32.v4i64"]
7281 unsafefn vcvtuqq2ps_256(a: u64x4, rounding: i32) -> f32x4;
7282 #[link_name = "llvm.x86.avx512.uitofp.round.v8f32.v8i64"]
7283 unsafefn vcvtuqq2ps_512(a: u64x8, rounding: i32) -> f32x8;
7284
7285 #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.128"]
7286 unsafefn vcvtpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2;
7287 #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.256"]
7288 unsafefn vcvtpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4;
7289 #[link_name = "llvm.x86.avx512.mask.cvtpd2qq.512"]
7290 unsafefn vcvtpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
7291
7292 #[link_name = "llvm.x86.avx512.mask.cvtps2qq.128"]
7293 unsafefn vcvtps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2;
7294 #[link_name = "llvm.x86.avx512.mask.cvtps2qq.256"]
7295 unsafefn vcvtps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4;
7296 #[link_name = "llvm.x86.avx512.mask.cvtps2qq.512"]
7297 unsafefn vcvtps2qq_512(a: f32x8, src: i64x8, k: __mmask8, rounding: i32) -> i64x8;
7298
7299 #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.128"]
7300 unsafefn vcvtpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2;
7301 #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.256"]
7302 unsafefn vcvtpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4;
7303 #[link_name = "llvm.x86.avx512.mask.cvtpd2uqq.512"]
7304 unsafefn vcvtpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
7305
7306 #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.128"]
7307 unsafefn vcvtps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2;
7308 #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.256"]
7309 unsafefn vcvtps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4;
7310 #[link_name = "llvm.x86.avx512.mask.cvtps2uqq.512"]
7311 unsafefn vcvtps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, rounding: i32) -> u64x8;
7312
7313 #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.128"]
7314 unsafefn vcvttpd2qq_128(a: f64x2, src: i64x2, k: __mmask8) -> i64x2;
7315 #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.256"]
7316 unsafefn vcvttpd2qq_256(a: f64x4, src: i64x4, k: __mmask8) -> i64x4;
7317 #[link_name = "llvm.x86.avx512.mask.cvttpd2qq.512"]
7318 unsafefn vcvttpd2qq_512(a: f64x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
7319
7320 #[link_name = "llvm.x86.avx512.mask.cvttps2qq.128"]
7321 unsafefn vcvttps2qq_128(a: f32x4, src: i64x2, k: __mmask8) -> i64x2;
7322 #[link_name = "llvm.x86.avx512.mask.cvttps2qq.256"]
7323 unsafefn vcvttps2qq_256(a: f32x4, src: i64x4, k: __mmask8) -> i64x4;
7324 #[link_name = "llvm.x86.avx512.mask.cvttps2qq.512"]
7325 unsafefn vcvttps2qq_512(a: f32x8, src: i64x8, k: __mmask8, sae: i32) -> i64x8;
7326
7327 #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.128"]
7328 unsafefn vcvttpd2uqq_128(a: f64x2, src: u64x2, k: __mmask8) -> u64x2;
7329 #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.256"]
7330 unsafefn vcvttpd2uqq_256(a: f64x4, src: u64x4, k: __mmask8) -> u64x4;
7331 #[link_name = "llvm.x86.avx512.mask.cvttpd2uqq.512"]
7332 unsafefn vcvttpd2uqq_512(a: f64x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
7333
7334 #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.128"]
7335 unsafefn vcvttps2uqq_128(a: f32x4, src: u64x2, k: __mmask8) -> u64x2;
7336 #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.256"]
7337 unsafefn vcvttps2uqq_256(a: f32x4, src: u64x4, k: __mmask8) -> u64x4;
7338 #[link_name = "llvm.x86.avx512.mask.cvttps2uqq.512"]
7339 unsafefn vcvttps2uqq_512(a: f32x8, src: u64x8, k: __mmask8, sae: i32) -> u64x8;
7340
7341 #[link_name = "llvm.x86.avx512.mask.range.pd.128"]
7342 unsafefn vrangepd_128(a: f64x2, b: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2;
7343 #[link_name = "llvm.x86.avx512.mask.range.pd.256"]
7344 unsafefn vrangepd_256(a: f64x4, b: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4;
7345 #[link_name = "llvm.x86.avx512.mask.range.pd.512"]
7346 unsafefn vrangepd_512(a: f64x8, b: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8;
7347
7348 #[link_name = "llvm.x86.avx512.mask.range.ps.128"]
7349 unsafefn vrangeps_128(a: f32x4, b: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4;
7350 #[link_name = "llvm.x86.avx512.mask.range.ps.256"]
7351 unsafefn vrangeps_256(a: f32x8, b: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8;
7352 #[link_name = "llvm.x86.avx512.mask.range.ps.512"]
7353 unsafefn vrangeps_512(a: f32x16, b: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32)
7354 -> f32x16;
7355
7356 #[link_name = "llvm.x86.avx512.mask.range.sd"]
7357 unsafefn vrangesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2;
7358 #[link_name = "llvm.x86.avx512.mask.range.ss"]
7359 unsafefn vrangess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4;
7360
7361 #[link_name = "llvm.x86.avx512.mask.reduce.pd.128"]
7362 unsafefn vreducepd_128(a: f64x2, imm8: i32, src: f64x2, k: __mmask8) -> f64x2;
7363 #[link_name = "llvm.x86.avx512.mask.reduce.pd.256"]
7364 unsafefn vreducepd_256(a: f64x4, imm8: i32, src: f64x4, k: __mmask8) -> f64x4;
7365 #[link_name = "llvm.x86.avx512.mask.reduce.pd.512"]
7366 unsafefn vreducepd_512(a: f64x8, imm8: i32, src: f64x8, k: __mmask8, sae: i32) -> f64x8;
7367
7368 #[link_name = "llvm.x86.avx512.mask.reduce.ps.128"]
7369 unsafefn vreduceps_128(a: f32x4, imm8: i32, src: f32x4, k: __mmask8) -> f32x4;
7370 #[link_name = "llvm.x86.avx512.mask.reduce.ps.256"]
7371 unsafefn vreduceps_256(a: f32x8, imm8: i32, src: f32x8, k: __mmask8) -> f32x8;
7372 #[link_name = "llvm.x86.avx512.mask.reduce.ps.512"]
7373 unsafefn vreduceps_512(a: f32x16, imm8: i32, src: f32x16, k: __mmask16, sae: i32) -> f32x16;
7374
7375 #[link_name = "llvm.x86.avx512.mask.reduce.sd"]
7376 unsafefn vreducesd(a: f64x2, b: f64x2, src: f64x2, k: __mmask8, imm8: i32, sae: i32) -> f64x2;
7377 #[link_name = "llvm.x86.avx512.mask.reduce.ss"]
7378 unsafefn vreducess(a: f32x4, b: f32x4, src: f32x4, k: __mmask8, imm8: i32, sae: i32) -> f32x4;
7379
7380 #[link_name = "llvm.x86.avx512.mask.fpclass.pd.128"]
7381 unsafefn vfpclasspd_128(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8;
7382 #[link_name = "llvm.x86.avx512.mask.fpclass.pd.256"]
7383 unsafefn vfpclasspd_256(a: f64x4, imm8: i32, k: __mmask8) -> __mmask8;
7384 #[link_name = "llvm.x86.avx512.mask.fpclass.pd.512"]
7385 unsafefn vfpclasspd_512(a: f64x8, imm8: i32, k: __mmask8) -> __mmask8;
7386
7387 #[link_name = "llvm.x86.avx512.mask.fpclass.ps.128"]
7388 unsafefn vfpclassps_128(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8;
7389 #[link_name = "llvm.x86.avx512.mask.fpclass.ps.256"]
7390 unsafefn vfpclassps_256(a: f32x8, imm8: i32, k: __mmask8) -> __mmask8;
7391 #[link_name = "llvm.x86.avx512.mask.fpclass.ps.512"]
7392 unsafefn vfpclassps_512(a: f32x16, imm8: i32, k: __mmask16) -> __mmask16;
7393
7394 #[link_name = "llvm.x86.avx512.mask.fpclass.sd"]
7395 unsafefn vfpclasssd(a: f64x2, imm8: i32, k: __mmask8) -> __mmask8;
7396 #[link_name = "llvm.x86.avx512.mask.fpclass.ss"]
7397 unsafefn vfpclassss(a: f32x4, imm8: i32, k: __mmask8) -> __mmask8;
7398}
7399
7400#[cfg(test)]
7401mod tests {
7402 use super::*;
7403 use crate::core_arch::assert_eq_const as assert_eq;
7404 use crate::core_arch::x86::*;
7405
7406 use stdarch_test::simd_test;
7407
7408 const OPRND1_64: f64 = f64::from_bits(0x3333333333333333);
7409 const OPRND2_64: f64 = f64::from_bits(0x5555555555555555);
7410
7411 const AND_64: f64 = f64::from_bits(0x1111111111111111);
7412 const ANDN_64: f64 = f64::from_bits(0x4444444444444444);
7413 const OR_64: f64 = f64::from_bits(0x7777777777777777);
7414 const XOR_64: f64 = f64::from_bits(0x6666666666666666);
7415
7416 const OPRND1_32: f32 = f32::from_bits(0x33333333);
7417 const OPRND2_32: f32 = f32::from_bits(0x55555555);
7418
7419 const AND_32: f32 = f32::from_bits(0x11111111);
7420 const ANDN_32: f32 = f32::from_bits(0x44444444);
7421 const OR_32: f32 = f32::from_bits(0x77777777);
7422 const XOR_32: f32 = f32::from_bits(0x66666666);
7423
7424 #[simd_test(enable = "avx512dq,avx512vl")]
7425 const fn test_mm_mask_and_pd() {
7426 let a = _mm_set1_pd(OPRND1_64);
7427 let b = _mm_set1_pd(OPRND2_64);
7428 let src = _mm_set_pd(1., 2.);
7429 let r = _mm_mask_and_pd(src, 0b01, a, b);
7430 let e = _mm_set_pd(1., AND_64);
7431 assert_eq_m128d(r, e);
7432 }
7433
7434 #[simd_test(enable = "avx512dq,avx512vl")]
7435 const fn test_mm_maskz_and_pd() {
7436 let a = _mm_set1_pd(OPRND1_64);
7437 let b = _mm_set1_pd(OPRND2_64);
7438 let r = _mm_maskz_and_pd(0b01, a, b);
7439 let e = _mm_set_pd(0.0, AND_64);
7440 assert_eq_m128d(r, e);
7441 }
7442
7443 #[simd_test(enable = "avx512dq,avx512vl")]
7444 const fn test_mm256_mask_and_pd() {
7445 let a = _mm256_set1_pd(OPRND1_64);
7446 let b = _mm256_set1_pd(OPRND2_64);
7447 let src = _mm256_set_pd(1., 2., 3., 4.);
7448 let r = _mm256_mask_and_pd(src, 0b0101, a, b);
7449 let e = _mm256_set_pd(1., AND_64, 3., AND_64);
7450 assert_eq_m256d(r, e);
7451 }
7452
7453 #[simd_test(enable = "avx512dq,avx512vl")]
7454 const fn test_mm256_maskz_and_pd() {
7455 let a = _mm256_set1_pd(OPRND1_64);
7456 let b = _mm256_set1_pd(OPRND2_64);
7457 let r = _mm256_maskz_and_pd(0b0101, a, b);
7458 let e = _mm256_set_pd(0.0, AND_64, 0.0, AND_64);
7459 assert_eq_m256d(r, e);
7460 }
7461
7462 #[simd_test(enable = "avx512dq")]
7463 const fn test_mm512_and_pd() {
7464 let a = _mm512_set1_pd(OPRND1_64);
7465 let b = _mm512_set1_pd(OPRND2_64);
7466 let r = _mm512_and_pd(a, b);
7467 let e = _mm512_set1_pd(AND_64);
7468 assert_eq_m512d(r, e);
7469 }
7470
7471 #[simd_test(enable = "avx512dq")]
7472 const fn test_mm512_mask_and_pd() {
7473 let a = _mm512_set1_pd(OPRND1_64);
7474 let b = _mm512_set1_pd(OPRND2_64);
7475 let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7476 let r = _mm512_mask_and_pd(src, 0b01010101, a, b);
7477 let e = _mm512_set_pd(1., AND_64, 3., AND_64, 5., AND_64, 7., AND_64);
7478 assert_eq_m512d(r, e);
7479 }
7480
7481 #[simd_test(enable = "avx512dq")]
7482 const fn test_mm512_maskz_and_pd() {
7483 let a = _mm512_set1_pd(OPRND1_64);
7484 let b = _mm512_set1_pd(OPRND2_64);
7485 let r = _mm512_maskz_and_pd(0b01010101, a, b);
7486 let e = _mm512_set_pd(0.0, AND_64, 0.0, AND_64, 0.0, AND_64, 0.0, AND_64);
7487 assert_eq_m512d(r, e);
7488 }
7489
7490 #[simd_test(enable = "avx512dq,avx512vl")]
7491 const fn test_mm_mask_and_ps() {
7492 let a = _mm_set1_ps(OPRND1_32);
7493 let b = _mm_set1_ps(OPRND2_32);
7494 let src = _mm_set_ps(1., 2., 3., 4.);
7495 let r = _mm_mask_and_ps(src, 0b0101, a, b);
7496 let e = _mm_set_ps(1., AND_32, 3., AND_32);
7497 assert_eq_m128(r, e);
7498 }
7499
7500 #[simd_test(enable = "avx512dq,avx512vl")]
7501 const fn test_mm_maskz_and_ps() {
7502 let a = _mm_set1_ps(OPRND1_32);
7503 let b = _mm_set1_ps(OPRND2_32);
7504 let r = _mm_maskz_and_ps(0b0101, a, b);
7505 let e = _mm_set_ps(0.0, AND_32, 0.0, AND_32);
7506 assert_eq_m128(r, e);
7507 }
7508
7509 #[simd_test(enable = "avx512dq,avx512vl")]
7510 const fn test_mm256_mask_and_ps() {
7511 let a = _mm256_set1_ps(OPRND1_32);
7512 let b = _mm256_set1_ps(OPRND2_32);
7513 let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7514 let r = _mm256_mask_and_ps(src, 0b01010101, a, b);
7515 let e = _mm256_set_ps(1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32);
7516 assert_eq_m256(r, e);
7517 }
7518
7519 #[simd_test(enable = "avx512dq,avx512vl")]
7520 const fn test_mm256_maskz_and_ps() {
7521 let a = _mm256_set1_ps(OPRND1_32);
7522 let b = _mm256_set1_ps(OPRND2_32);
7523 let r = _mm256_maskz_and_ps(0b01010101, a, b);
7524 let e = _mm256_set_ps(0.0, AND_32, 0.0, AND_32, 0.0, AND_32, 0.0, AND_32);
7525 assert_eq_m256(r, e);
7526 }
7527
7528 #[simd_test(enable = "avx512dq")]
7529 const fn test_mm512_and_ps() {
7530 let a = _mm512_set1_ps(OPRND1_32);
7531 let b = _mm512_set1_ps(OPRND2_32);
7532 let r = _mm512_and_ps(a, b);
7533 let e = _mm512_set1_ps(AND_32);
7534 assert_eq_m512(r, e);
7535 }
7536
7537 #[simd_test(enable = "avx512dq")]
7538 const fn test_mm512_mask_and_ps() {
7539 let a = _mm512_set1_ps(OPRND1_32);
7540 let b = _mm512_set1_ps(OPRND2_32);
7541 let src = _mm512_set_ps(
7542 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7543 );
7544 let r = _mm512_mask_and_ps(src, 0b0101010101010101, a, b);
7545 let e = _mm512_set_ps(
7546 1., AND_32, 3., AND_32, 5., AND_32, 7., AND_32, 9., AND_32, 11., AND_32, 13., AND_32,
7547 15., AND_32,
7548 );
7549 assert_eq_m512(r, e);
7550 }
7551
7552 #[simd_test(enable = "avx512dq")]
7553 const fn test_mm512_maskz_and_ps() {
7554 let a = _mm512_set1_ps(OPRND1_32);
7555 let b = _mm512_set1_ps(OPRND2_32);
7556 let r = _mm512_maskz_and_ps(0b0101010101010101, a, b);
7557 let e = _mm512_set_ps(
7558 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0., AND_32, 0.,
7559 AND_32,
7560 );
7561 assert_eq_m512(r, e);
7562 }
7563
7564 #[simd_test(enable = "avx512dq,avx512vl")]
7565 const fn test_mm_mask_andnot_pd() {
7566 let a = _mm_set1_pd(OPRND1_64);
7567 let b = _mm_set1_pd(OPRND2_64);
7568 let src = _mm_set_pd(1., 2.);
7569 let r = _mm_mask_andnot_pd(src, 0b01, a, b);
7570 let e = _mm_set_pd(1., ANDN_64);
7571 assert_eq_m128d(r, e);
7572 }
7573
7574 #[simd_test(enable = "avx512dq,avx512vl")]
7575 const fn test_mm_maskz_andnot_pd() {
7576 let a = _mm_set1_pd(OPRND1_64);
7577 let b = _mm_set1_pd(OPRND2_64);
7578 let r = _mm_maskz_andnot_pd(0b01, a, b);
7579 let e = _mm_set_pd(0.0, ANDN_64);
7580 assert_eq_m128d(r, e);
7581 }
7582
7583 #[simd_test(enable = "avx512dq,avx512vl")]
7584 const fn test_mm256_mask_andnot_pd() {
7585 let a = _mm256_set1_pd(OPRND1_64);
7586 let b = _mm256_set1_pd(OPRND2_64);
7587 let src = _mm256_set_pd(1., 2., 3., 4.);
7588 let r = _mm256_mask_andnot_pd(src, 0b0101, a, b);
7589 let e = _mm256_set_pd(1., ANDN_64, 3., ANDN_64);
7590 assert_eq_m256d(r, e);
7591 }
7592
7593 #[simd_test(enable = "avx512dq,avx512vl")]
7594 const fn test_mm256_maskz_andnot_pd() {
7595 let a = _mm256_set1_pd(OPRND1_64);
7596 let b = _mm256_set1_pd(OPRND2_64);
7597 let r = _mm256_maskz_andnot_pd(0b0101, a, b);
7598 let e = _mm256_set_pd(0.0, ANDN_64, 0.0, ANDN_64);
7599 assert_eq_m256d(r, e);
7600 }
7601
7602 #[simd_test(enable = "avx512dq")]
7603 const fn test_mm512_andnot_pd() {
7604 let a = _mm512_set1_pd(OPRND1_64);
7605 let b = _mm512_set1_pd(OPRND2_64);
7606 let r = _mm512_andnot_pd(a, b);
7607 let e = _mm512_set1_pd(ANDN_64);
7608 assert_eq_m512d(r, e);
7609 }
7610
7611 #[simd_test(enable = "avx512dq")]
7612 const fn test_mm512_mask_andnot_pd() {
7613 let a = _mm512_set1_pd(OPRND1_64);
7614 let b = _mm512_set1_pd(OPRND2_64);
7615 let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7616 let r = _mm512_mask_andnot_pd(src, 0b01010101, a, b);
7617 let e = _mm512_set_pd(1., ANDN_64, 3., ANDN_64, 5., ANDN_64, 7., ANDN_64);
7618 assert_eq_m512d(r, e);
7619 }
7620
7621 #[simd_test(enable = "avx512dq")]
7622 const fn test_mm512_maskz_andnot_pd() {
7623 let a = _mm512_set1_pd(OPRND1_64);
7624 let b = _mm512_set1_pd(OPRND2_64);
7625 let r = _mm512_maskz_andnot_pd(0b01010101, a, b);
7626 let e = _mm512_set_pd(0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64, 0.0, ANDN_64);
7627 assert_eq_m512d(r, e);
7628 }
7629
7630 #[simd_test(enable = "avx512dq,avx512vl")]
7631 const fn test_mm_mask_andnot_ps() {
7632 let a = _mm_set1_ps(OPRND1_32);
7633 let b = _mm_set1_ps(OPRND2_32);
7634 let src = _mm_set_ps(1., 2., 3., 4.);
7635 let r = _mm_mask_andnot_ps(src, 0b0101, a, b);
7636 let e = _mm_set_ps(1., ANDN_32, 3., ANDN_32);
7637 assert_eq_m128(r, e);
7638 }
7639
7640 #[simd_test(enable = "avx512dq,avx512vl")]
7641 const fn test_mm_maskz_andnot_ps() {
7642 let a = _mm_set1_ps(OPRND1_32);
7643 let b = _mm_set1_ps(OPRND2_32);
7644 let r = _mm_maskz_andnot_ps(0b0101, a, b);
7645 let e = _mm_set_ps(0.0, ANDN_32, 0.0, ANDN_32);
7646 assert_eq_m128(r, e);
7647 }
7648
7649 #[simd_test(enable = "avx512dq,avx512vl")]
7650 const fn test_mm256_mask_andnot_ps() {
7651 let a = _mm256_set1_ps(OPRND1_32);
7652 let b = _mm256_set1_ps(OPRND2_32);
7653 let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7654 let r = _mm256_mask_andnot_ps(src, 0b01010101, a, b);
7655 let e = _mm256_set_ps(1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32);
7656 assert_eq_m256(r, e);
7657 }
7658
7659 #[simd_test(enable = "avx512dq,avx512vl")]
7660 const fn test_mm256_maskz_andnot_ps() {
7661 let a = _mm256_set1_ps(OPRND1_32);
7662 let b = _mm256_set1_ps(OPRND2_32);
7663 let r = _mm256_maskz_andnot_ps(0b01010101, a, b);
7664 let e = _mm256_set_ps(0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32, 0.0, ANDN_32);
7665 assert_eq_m256(r, e);
7666 }
7667
7668 #[simd_test(enable = "avx512dq")]
7669 const fn test_mm512_andnot_ps() {
7670 let a = _mm512_set1_ps(OPRND1_32);
7671 let b = _mm512_set1_ps(OPRND2_32);
7672 let r = _mm512_andnot_ps(a, b);
7673 let e = _mm512_set1_ps(ANDN_32);
7674 assert_eq_m512(r, e);
7675 }
7676
7677 #[simd_test(enable = "avx512dq")]
7678 const fn test_mm512_mask_andnot_ps() {
7679 let a = _mm512_set1_ps(OPRND1_32);
7680 let b = _mm512_set1_ps(OPRND2_32);
7681 let src = _mm512_set_ps(
7682 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7683 );
7684 let r = _mm512_mask_andnot_ps(src, 0b0101010101010101, a, b);
7685 let e = _mm512_set_ps(
7686 1., ANDN_32, 3., ANDN_32, 5., ANDN_32, 7., ANDN_32, 9., ANDN_32, 11., ANDN_32, 13.,
7687 ANDN_32, 15., ANDN_32,
7688 );
7689 assert_eq_m512(r, e);
7690 }
7691
7692 #[simd_test(enable = "avx512dq")]
7693 const fn test_mm512_maskz_andnot_ps() {
7694 let a = _mm512_set1_ps(OPRND1_32);
7695 let b = _mm512_set1_ps(OPRND2_32);
7696 let r = _mm512_maskz_andnot_ps(0b0101010101010101, a, b);
7697 let e = _mm512_set_ps(
7698 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0., ANDN_32, 0.,
7699 ANDN_32, 0., ANDN_32,
7700 );
7701 assert_eq_m512(r, e);
7702 }
7703
7704 #[simd_test(enable = "avx512dq,avx512vl")]
7705 const fn test_mm_mask_or_pd() {
7706 let a = _mm_set1_pd(OPRND1_64);
7707 let b = _mm_set1_pd(OPRND2_64);
7708 let src = _mm_set_pd(1., 2.);
7709 let r = _mm_mask_or_pd(src, 0b01, a, b);
7710 let e = _mm_set_pd(1., OR_64);
7711 assert_eq_m128d(r, e);
7712 }
7713
7714 #[simd_test(enable = "avx512dq,avx512vl")]
7715 const fn test_mm_maskz_or_pd() {
7716 let a = _mm_set1_pd(OPRND1_64);
7717 let b = _mm_set1_pd(OPRND2_64);
7718 let r = _mm_maskz_or_pd(0b01, a, b);
7719 let e = _mm_set_pd(0.0, OR_64);
7720 assert_eq_m128d(r, e);
7721 }
7722
7723 #[simd_test(enable = "avx512dq,avx512vl")]
7724 const fn test_mm256_mask_or_pd() {
7725 let a = _mm256_set1_pd(OPRND1_64);
7726 let b = _mm256_set1_pd(OPRND2_64);
7727 let src = _mm256_set_pd(1., 2., 3., 4.);
7728 let r = _mm256_mask_or_pd(src, 0b0101, a, b);
7729 let e = _mm256_set_pd(1., OR_64, 3., OR_64);
7730 assert_eq_m256d(r, e);
7731 }
7732
7733 #[simd_test(enable = "avx512dq,avx512vl")]
7734 const fn test_mm256_maskz_or_pd() {
7735 let a = _mm256_set1_pd(OPRND1_64);
7736 let b = _mm256_set1_pd(OPRND2_64);
7737 let r = _mm256_maskz_or_pd(0b0101, a, b);
7738 let e = _mm256_set_pd(0.0, OR_64, 0.0, OR_64);
7739 assert_eq_m256d(r, e);
7740 }
7741
7742 #[simd_test(enable = "avx512dq")]
7743 const fn test_mm512_or_pd() {
7744 let a = _mm512_set1_pd(OPRND1_64);
7745 let b = _mm512_set1_pd(OPRND2_64);
7746 let r = _mm512_or_pd(a, b);
7747 let e = _mm512_set1_pd(OR_64);
7748 assert_eq_m512d(r, e);
7749 }
7750
7751 #[simd_test(enable = "avx512dq")]
7752 const fn test_mm512_mask_or_pd() {
7753 let a = _mm512_set1_pd(OPRND1_64);
7754 let b = _mm512_set1_pd(OPRND2_64);
7755 let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7756 let r = _mm512_mask_or_pd(src, 0b01010101, a, b);
7757 let e = _mm512_set_pd(1., OR_64, 3., OR_64, 5., OR_64, 7., OR_64);
7758 assert_eq_m512d(r, e);
7759 }
7760
7761 #[simd_test(enable = "avx512dq")]
7762 const fn test_mm512_maskz_or_pd() {
7763 let a = _mm512_set1_pd(OPRND1_64);
7764 let b = _mm512_set1_pd(OPRND2_64);
7765 let r = _mm512_maskz_or_pd(0b01010101, a, b);
7766 let e = _mm512_set_pd(0.0, OR_64, 0.0, OR_64, 0.0, OR_64, 0.0, OR_64);
7767 assert_eq_m512d(r, e);
7768 }
7769
7770 #[simd_test(enable = "avx512dq,avx512vl")]
7771 const fn test_mm_mask_or_ps() {
7772 let a = _mm_set1_ps(OPRND1_32);
7773 let b = _mm_set1_ps(OPRND2_32);
7774 let src = _mm_set_ps(1., 2., 3., 4.);
7775 let r = _mm_mask_or_ps(src, 0b0101, a, b);
7776 let e = _mm_set_ps(1., OR_32, 3., OR_32);
7777 assert_eq_m128(r, e);
7778 }
7779
7780 #[simd_test(enable = "avx512dq,avx512vl")]
7781 const fn test_mm_maskz_or_ps() {
7782 let a = _mm_set1_ps(OPRND1_32);
7783 let b = _mm_set1_ps(OPRND2_32);
7784 let r = _mm_maskz_or_ps(0b0101, a, b);
7785 let e = _mm_set_ps(0.0, OR_32, 0.0, OR_32);
7786 assert_eq_m128(r, e);
7787 }
7788
7789 #[simd_test(enable = "avx512dq,avx512vl")]
7790 const fn test_mm256_mask_or_ps() {
7791 let a = _mm256_set1_ps(OPRND1_32);
7792 let b = _mm256_set1_ps(OPRND2_32);
7793 let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7794 let r = _mm256_mask_or_ps(src, 0b01010101, a, b);
7795 let e = _mm256_set_ps(1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32);
7796 assert_eq_m256(r, e);
7797 }
7798
7799 #[simd_test(enable = "avx512dq,avx512vl")]
7800 const fn test_mm256_maskz_or_ps() {
7801 let a = _mm256_set1_ps(OPRND1_32);
7802 let b = _mm256_set1_ps(OPRND2_32);
7803 let r = _mm256_maskz_or_ps(0b01010101, a, b);
7804 let e = _mm256_set_ps(0.0, OR_32, 0.0, OR_32, 0.0, OR_32, 0.0, OR_32);
7805 assert_eq_m256(r, e);
7806 }
7807
7808 #[simd_test(enable = "avx512dq")]
7809 const fn test_mm512_or_ps() {
7810 let a = _mm512_set1_ps(OPRND1_32);
7811 let b = _mm512_set1_ps(OPRND2_32);
7812 let r = _mm512_or_ps(a, b);
7813 let e = _mm512_set1_ps(OR_32);
7814 assert_eq_m512(r, e);
7815 }
7816
7817 #[simd_test(enable = "avx512dq")]
7818 const fn test_mm512_mask_or_ps() {
7819 let a = _mm512_set1_ps(OPRND1_32);
7820 let b = _mm512_set1_ps(OPRND2_32);
7821 let src = _mm512_set_ps(
7822 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7823 );
7824 let r = _mm512_mask_or_ps(src, 0b0101010101010101, a, b);
7825 let e = _mm512_set_ps(
7826 1., OR_32, 3., OR_32, 5., OR_32, 7., OR_32, 9., OR_32, 11., OR_32, 13., OR_32, 15.,
7827 OR_32,
7828 );
7829 assert_eq_m512(r, e);
7830 }
7831
7832 #[simd_test(enable = "avx512dq")]
7833 const fn test_mm512_maskz_or_ps() {
7834 let a = _mm512_set1_ps(OPRND1_32);
7835 let b = _mm512_set1_ps(OPRND2_32);
7836 let r = _mm512_maskz_or_ps(0b0101010101010101, a, b);
7837 let e = _mm512_set_ps(
7838 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32, 0., OR_32,
7839 );
7840 assert_eq_m512(r, e);
7841 }
7842
7843 #[simd_test(enable = "avx512dq,avx512vl")]
7844 const fn test_mm_mask_xor_pd() {
7845 let a = _mm_set1_pd(OPRND1_64);
7846 let b = _mm_set1_pd(OPRND2_64);
7847 let src = _mm_set_pd(1., 2.);
7848 let r = _mm_mask_xor_pd(src, 0b01, a, b);
7849 let e = _mm_set_pd(1., XOR_64);
7850 assert_eq_m128d(r, e);
7851 }
7852
7853 #[simd_test(enable = "avx512dq,avx512vl")]
7854 const fn test_mm_maskz_xor_pd() {
7855 let a = _mm_set1_pd(OPRND1_64);
7856 let b = _mm_set1_pd(OPRND2_64);
7857 let r = _mm_maskz_xor_pd(0b01, a, b);
7858 let e = _mm_set_pd(0.0, XOR_64);
7859 assert_eq_m128d(r, e);
7860 }
7861
7862 #[simd_test(enable = "avx512dq,avx512vl")]
7863 const fn test_mm256_mask_xor_pd() {
7864 let a = _mm256_set1_pd(OPRND1_64);
7865 let b = _mm256_set1_pd(OPRND2_64);
7866 let src = _mm256_set_pd(1., 2., 3., 4.);
7867 let r = _mm256_mask_xor_pd(src, 0b0101, a, b);
7868 let e = _mm256_set_pd(1., XOR_64, 3., XOR_64);
7869 assert_eq_m256d(r, e);
7870 }
7871
7872 #[simd_test(enable = "avx512dq,avx512vl")]
7873 const fn test_mm256_maskz_xor_pd() {
7874 let a = _mm256_set1_pd(OPRND1_64);
7875 let b = _mm256_set1_pd(OPRND2_64);
7876 let r = _mm256_maskz_xor_pd(0b0101, a, b);
7877 let e = _mm256_set_pd(0.0, XOR_64, 0.0, XOR_64);
7878 assert_eq_m256d(r, e);
7879 }
7880
7881 #[simd_test(enable = "avx512dq")]
7882 const fn test_mm512_xor_pd() {
7883 let a = _mm512_set1_pd(OPRND1_64);
7884 let b = _mm512_set1_pd(OPRND2_64);
7885 let r = _mm512_xor_pd(a, b);
7886 let e = _mm512_set1_pd(XOR_64);
7887 assert_eq_m512d(r, e);
7888 }
7889
7890 #[simd_test(enable = "avx512dq")]
7891 const fn test_mm512_mask_xor_pd() {
7892 let a = _mm512_set1_pd(OPRND1_64);
7893 let b = _mm512_set1_pd(OPRND2_64);
7894 let src = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
7895 let r = _mm512_mask_xor_pd(src, 0b01010101, a, b);
7896 let e = _mm512_set_pd(1., XOR_64, 3., XOR_64, 5., XOR_64, 7., XOR_64);
7897 assert_eq_m512d(r, e);
7898 }
7899
7900 #[simd_test(enable = "avx512dq")]
7901 const fn test_mm512_maskz_xor_pd() {
7902 let a = _mm512_set1_pd(OPRND1_64);
7903 let b = _mm512_set1_pd(OPRND2_64);
7904 let r = _mm512_maskz_xor_pd(0b01010101, a, b);
7905 let e = _mm512_set_pd(0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64, 0.0, XOR_64);
7906 assert_eq_m512d(r, e);
7907 }
7908
7909 #[simd_test(enable = "avx512dq,avx512vl")]
7910 const fn test_mm_mask_xor_ps() {
7911 let a = _mm_set1_ps(OPRND1_32);
7912 let b = _mm_set1_ps(OPRND2_32);
7913 let src = _mm_set_ps(1., 2., 3., 4.);
7914 let r = _mm_mask_xor_ps(src, 0b0101, a, b);
7915 let e = _mm_set_ps(1., XOR_32, 3., XOR_32);
7916 assert_eq_m128(r, e);
7917 }
7918
7919 #[simd_test(enable = "avx512dq,avx512vl")]
7920 const fn test_mm_maskz_xor_ps() {
7921 let a = _mm_set1_ps(OPRND1_32);
7922 let b = _mm_set1_ps(OPRND2_32);
7923 let r = _mm_maskz_xor_ps(0b0101, a, b);
7924 let e = _mm_set_ps(0.0, XOR_32, 0.0, XOR_32);
7925 assert_eq_m128(r, e);
7926 }
7927
7928 #[simd_test(enable = "avx512dq,avx512vl")]
7929 const fn test_mm256_mask_xor_ps() {
7930 let a = _mm256_set1_ps(OPRND1_32);
7931 let b = _mm256_set1_ps(OPRND2_32);
7932 let src = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
7933 let r = _mm256_mask_xor_ps(src, 0b01010101, a, b);
7934 let e = _mm256_set_ps(1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32);
7935 assert_eq_m256(r, e);
7936 }
7937
7938 #[simd_test(enable = "avx512dq,avx512vl")]
7939 const fn test_mm256_maskz_xor_ps() {
7940 let a = _mm256_set1_ps(OPRND1_32);
7941 let b = _mm256_set1_ps(OPRND2_32);
7942 let r = _mm256_maskz_xor_ps(0b01010101, a, b);
7943 let e = _mm256_set_ps(0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32, 0.0, XOR_32);
7944 assert_eq_m256(r, e);
7945 }
7946
7947 #[simd_test(enable = "avx512dq")]
7948 const fn test_mm512_xor_ps() {
7949 let a = _mm512_set1_ps(OPRND1_32);
7950 let b = _mm512_set1_ps(OPRND2_32);
7951 let r = _mm512_xor_ps(a, b);
7952 let e = _mm512_set1_ps(XOR_32);
7953 assert_eq_m512(r, e);
7954 }
7955
7956 #[simd_test(enable = "avx512dq")]
7957 const fn test_mm512_mask_xor_ps() {
7958 let a = _mm512_set1_ps(OPRND1_32);
7959 let b = _mm512_set1_ps(OPRND2_32);
7960 let src = _mm512_set_ps(
7961 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
7962 );
7963 let r = _mm512_mask_xor_ps(src, 0b0101010101010101, a, b);
7964 let e = _mm512_set_ps(
7965 1., XOR_32, 3., XOR_32, 5., XOR_32, 7., XOR_32, 9., XOR_32, 11., XOR_32, 13., XOR_32,
7966 15., XOR_32,
7967 );
7968 assert_eq_m512(r, e);
7969 }
7970
7971 #[simd_test(enable = "avx512dq")]
7972 const fn test_mm512_maskz_xor_ps() {
7973 let a = _mm512_set1_ps(OPRND1_32);
7974 let b = _mm512_set1_ps(OPRND2_32);
7975 let r = _mm512_maskz_xor_ps(0b0101010101010101, a, b);
7976 let e = _mm512_set_ps(
7977 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0., XOR_32, 0.,
7978 XOR_32,
7979 );
7980 assert_eq_m512(r, e);
7981 }
7982
7983 #[simd_test(enable = "avx512dq,avx512vl")]
7984 const fn test_mm256_broadcast_f32x2() {
7985 let a = _mm_set_ps(1., 2., 3., 4.);
7986 let r = _mm256_broadcast_f32x2(a);
7987 let e = _mm256_set_ps(3., 4., 3., 4., 3., 4., 3., 4.);
7988 assert_eq_m256(r, e);
7989 }
7990
7991 #[simd_test(enable = "avx512dq,avx512vl")]
7992 const fn test_mm256_mask_broadcast_f32x2() {
7993 let a = _mm_set_ps(1., 2., 3., 4.);
7994 let b = _mm256_set_ps(5., 6., 7., 8., 9., 10., 11., 12.);
7995 let r = _mm256_mask_broadcast_f32x2(b, 0b01101001, a);
7996 let e = _mm256_set_ps(5., 4., 3., 8., 3., 10., 11., 4.);
7997 assert_eq_m256(r, e);
7998 }
7999
8000 #[simd_test(enable = "avx512dq,avx512vl")]
8001 const fn test_mm256_maskz_broadcast_f32x2() {
8002 let a = _mm_set_ps(1., 2., 3., 4.);
8003 let r = _mm256_maskz_broadcast_f32x2(0b01101001, a);
8004 let e = _mm256_set_ps(0., 4., 3., 0., 3., 0., 0., 4.);
8005 assert_eq_m256(r, e);
8006 }
8007
8008 #[simd_test(enable = "avx512dq")]
8009 const fn test_mm512_broadcast_f32x2() {
8010 let a = _mm_set_ps(1., 2., 3., 4.);
8011 let r = _mm512_broadcast_f32x2(a);
8012 let e = _mm512_set_ps(
8013 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4., 3., 4.,
8014 );
8015 assert_eq_m512(r, e);
8016 }
8017
8018 #[simd_test(enable = "avx512dq")]
8019 const fn test_mm512_mask_broadcast_f32x2() {
8020 let a = _mm_set_ps(1., 2., 3., 4.);
8021 let b = _mm512_set_ps(
8022 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
8023 );
8024 let r = _mm512_mask_broadcast_f32x2(b, 0b0110100100111100, a);
8025 let e = _mm512_set_ps(
8026 5., 4., 3., 8., 3., 10., 11., 4., 13., 14., 3., 4., 3., 4., 19., 20.,
8027 );
8028 assert_eq_m512(r, e);
8029 }
8030
8031 #[simd_test(enable = "avx512dq")]
8032 const fn test_mm512_maskz_broadcast_f32x2() {
8033 let a = _mm_set_ps(1., 2., 3., 4.);
8034 let r = _mm512_maskz_broadcast_f32x2(0b0110100100111100, a);
8035 let e = _mm512_set_ps(
8036 0., 4., 3., 0., 3., 0., 0., 4., 0., 0., 3., 4., 3., 4., 0., 0.,
8037 );
8038 assert_eq_m512(r, e);
8039 }
8040
8041 #[simd_test(enable = "avx512dq")]
8042 const fn test_mm512_broadcast_f32x8() {
8043 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8044 let r = _mm512_broadcast_f32x8(a);
8045 let e = _mm512_set_ps(
8046 1., 2., 3., 4., 5., 6., 7., 8., 1., 2., 3., 4., 5., 6., 7., 8.,
8047 );
8048 assert_eq_m512(r, e);
8049 }
8050
8051 #[simd_test(enable = "avx512dq")]
8052 const fn test_mm512_mask_broadcast_f32x8() {
8053 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8054 let b = _mm512_set_ps(
8055 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.,
8056 );
8057 let r = _mm512_mask_broadcast_f32x8(b, 0b0110100100111100, a);
8058 let e = _mm512_set_ps(
8059 9., 2., 3., 12., 5., 14., 15., 8., 17., 18., 3., 4., 5., 6., 23., 24.,
8060 );
8061 assert_eq_m512(r, e);
8062 }
8063
8064 #[simd_test(enable = "avx512dq")]
8065 const fn test_mm512_maskz_broadcast_f32x8() {
8066 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8067 let r = _mm512_maskz_broadcast_f32x8(0b0110100100111100, a);
8068 let e = _mm512_set_ps(
8069 0., 2., 3., 0., 5., 0., 0., 8., 0., 0., 3., 4., 5., 6., 0., 0.,
8070 );
8071 assert_eq_m512(r, e);
8072 }
8073
8074 #[simd_test(enable = "avx512dq,avx512vl")]
8075 const fn test_mm256_broadcast_f64x2() {
8076 let a = _mm_set_pd(1., 2.);
8077 let r = _mm256_broadcast_f64x2(a);
8078 let e = _mm256_set_pd(1., 2., 1., 2.);
8079 assert_eq_m256d(r, e);
8080 }
8081
8082 #[simd_test(enable = "avx512dq,avx512vl")]
8083 const fn test_mm256_mask_broadcast_f64x2() {
8084 let a = _mm_set_pd(1., 2.);
8085 let b = _mm256_set_pd(3., 4., 5., 6.);
8086 let r = _mm256_mask_broadcast_f64x2(b, 0b0110, a);
8087 let e = _mm256_set_pd(3., 2., 1., 6.);
8088 assert_eq_m256d(r, e);
8089 }
8090
8091 #[simd_test(enable = "avx512dq,avx512vl")]
8092 const fn test_mm256_maskz_broadcast_f64x2() {
8093 let a = _mm_set_pd(1., 2.);
8094 let r = _mm256_maskz_broadcast_f64x2(0b0110, a);
8095 let e = _mm256_set_pd(0., 2., 1., 0.);
8096 assert_eq_m256d(r, e);
8097 }
8098
8099 #[simd_test(enable = "avx512dq")]
8100 const fn test_mm512_broadcast_f64x2() {
8101 let a = _mm_set_pd(1., 2.);
8102 let r = _mm512_broadcast_f64x2(a);
8103 let e = _mm512_set_pd(1., 2., 1., 2., 1., 2., 1., 2.);
8104 assert_eq_m512d(r, e);
8105 }
8106
8107 #[simd_test(enable = "avx512dq")]
8108 const fn test_mm512_mask_broadcast_f64x2() {
8109 let a = _mm_set_pd(1., 2.);
8110 let b = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
8111 let r = _mm512_mask_broadcast_f64x2(b, 0b01101001, a);
8112 let e = _mm512_set_pd(3., 2., 1., 6., 1., 8., 9., 2.);
8113 assert_eq_m512d(r, e);
8114 }
8115
8116 #[simd_test(enable = "avx512dq")]
8117 const fn test_mm512_maskz_broadcast_f64x2() {
8118 let a = _mm_set_pd(1., 2.);
8119 let r = _mm512_maskz_broadcast_f64x2(0b01101001, a);
8120 let e = _mm512_set_pd(0., 2., 1., 0., 1., 0., 0., 2.);
8121 assert_eq_m512d(r, e);
8122 }
8123
8124 #[simd_test(enable = "avx512dq,avx512vl")]
8125 const fn test_mm_broadcast_i32x2() {
8126 let a = _mm_set_epi32(1, 2, 3, 4);
8127 let r = _mm_broadcast_i32x2(a);
8128 let e = _mm_set_epi32(3, 4, 3, 4);
8129 assert_eq_m128i(r, e);
8130 }
8131
8132 #[simd_test(enable = "avx512dq,avx512vl")]
8133 const fn test_mm_mask_broadcast_i32x2() {
8134 let a = _mm_set_epi32(1, 2, 3, 4);
8135 let b = _mm_set_epi32(5, 6, 7, 8);
8136 let r = _mm_mask_broadcast_i32x2(b, 0b0110, a);
8137 let e = _mm_set_epi32(5, 4, 3, 8);
8138 assert_eq_m128i(r, e);
8139 }
8140
8141 #[simd_test(enable = "avx512dq,avx512vl")]
8142 const fn test_mm_maskz_broadcast_i32x2() {
8143 let a = _mm_set_epi32(1, 2, 3, 4);
8144 let r = _mm_maskz_broadcast_i32x2(0b0110, a);
8145 let e = _mm_set_epi32(0, 4, 3, 0);
8146 assert_eq_m128i(r, e);
8147 }
8148
8149 #[simd_test(enable = "avx512dq,avx512vl")]
8150 const fn test_mm256_broadcast_i32x2() {
8151 let a = _mm_set_epi32(1, 2, 3, 4);
8152 let r = _mm256_broadcast_i32x2(a);
8153 let e = _mm256_set_epi32(3, 4, 3, 4, 3, 4, 3, 4);
8154 assert_eq_m256i(r, e);
8155 }
8156
8157 #[simd_test(enable = "avx512dq,avx512vl")]
8158 const fn test_mm256_mask_broadcast_i32x2() {
8159 let a = _mm_set_epi32(1, 2, 3, 4);
8160 let b = _mm256_set_epi32(5, 6, 7, 8, 9, 10, 11, 12);
8161 let r = _mm256_mask_broadcast_i32x2(b, 0b01101001, a);
8162 let e = _mm256_set_epi32(5, 4, 3, 8, 3, 10, 11, 4);
8163 assert_eq_m256i(r, e);
8164 }
8165
8166 #[simd_test(enable = "avx512dq,avx512vl")]
8167 const fn test_mm256_maskz_broadcast_i32x2() {
8168 let a = _mm_set_epi32(1, 2, 3, 4);
8169 let r = _mm256_maskz_broadcast_i32x2(0b01101001, a);
8170 let e = _mm256_set_epi32(0, 4, 3, 0, 3, 0, 0, 4);
8171 assert_eq_m256i(r, e);
8172 }
8173
8174 #[simd_test(enable = "avx512dq")]
8175 const fn test_mm512_broadcast_i32x2() {
8176 let a = _mm_set_epi32(1, 2, 3, 4);
8177 let r = _mm512_broadcast_i32x2(a);
8178 let e = _mm512_set_epi32(3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4, 3, 4);
8179 assert_eq_m512i(r, e);
8180 }
8181
8182 #[simd_test(enable = "avx512dq")]
8183 const fn test_mm512_mask_broadcast_i32x2() {
8184 let a = _mm_set_epi32(1, 2, 3, 4);
8185 let b = _mm512_set_epi32(5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
8186 let r = _mm512_mask_broadcast_i32x2(b, 0b0110100100111100, a);
8187 let e = _mm512_set_epi32(5, 4, 3, 8, 3, 10, 11, 4, 13, 14, 3, 4, 3, 4, 19, 20);
8188 assert_eq_m512i(r, e);
8189 }
8190
8191 #[simd_test(enable = "avx512dq")]
8192 const fn test_mm512_maskz_broadcast_i32x2() {
8193 let a = _mm_set_epi32(1, 2, 3, 4);
8194 let r = _mm512_maskz_broadcast_i32x2(0b0110100100111100, a);
8195 let e = _mm512_set_epi32(0, 4, 3, 0, 3, 0, 0, 4, 0, 0, 3, 4, 3, 4, 0, 0);
8196 assert_eq_m512i(r, e);
8197 }
8198
8199 #[simd_test(enable = "avx512dq")]
8200 const fn test_mm512_broadcast_i32x8() {
8201 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
8202 let r = _mm512_broadcast_i32x8(a);
8203 let e = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8);
8204 assert_eq_m512i(r, e);
8205 }
8206
8207 #[simd_test(enable = "avx512dq")]
8208 const fn test_mm512_mask_broadcast_i32x8() {
8209 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
8210 let b = _mm512_set_epi32(
8211 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
8212 );
8213 let r = _mm512_mask_broadcast_i32x8(b, 0b0110100100111100, a);
8214 let e = _mm512_set_epi32(9, 2, 3, 12, 5, 14, 15, 8, 17, 18, 3, 4, 5, 6, 23, 24);
8215 assert_eq_m512i(r, e);
8216 }
8217
8218 #[simd_test(enable = "avx512dq")]
8219 const fn test_mm512_maskz_broadcast_i32x8() {
8220 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
8221 let r = _mm512_maskz_broadcast_i32x8(0b0110100100111100, a);
8222 let e = _mm512_set_epi32(0, 2, 3, 0, 5, 0, 0, 8, 0, 0, 3, 4, 5, 6, 0, 0);
8223 assert_eq_m512i(r, e);
8224 }
8225
8226 #[simd_test(enable = "avx512dq,avx512vl")]
8227 const fn test_mm256_broadcast_i64x2() {
8228 let a = _mm_set_epi64x(1, 2);
8229 let r = _mm256_broadcast_i64x2(a);
8230 let e = _mm256_set_epi64x(1, 2, 1, 2);
8231 assert_eq_m256i(r, e);
8232 }
8233
8234 #[simd_test(enable = "avx512dq,avx512vl")]
8235 const fn test_mm256_mask_broadcast_i64x2() {
8236 let a = _mm_set_epi64x(1, 2);
8237 let b = _mm256_set_epi64x(3, 4, 5, 6);
8238 let r = _mm256_mask_broadcast_i64x2(b, 0b0110, a);
8239 let e = _mm256_set_epi64x(3, 2, 1, 6);
8240 assert_eq_m256i(r, e);
8241 }
8242
8243 #[simd_test(enable = "avx512dq,avx512vl")]
8244 const fn test_mm256_maskz_broadcast_i64x2() {
8245 let a = _mm_set_epi64x(1, 2);
8246 let r = _mm256_maskz_broadcast_i64x2(0b0110, a);
8247 let e = _mm256_set_epi64x(0, 2, 1, 0);
8248 assert_eq_m256i(r, e);
8249 }
8250
8251 #[simd_test(enable = "avx512dq")]
8252 const fn test_mm512_broadcast_i64x2() {
8253 let a = _mm_set_epi64x(1, 2);
8254 let r = _mm512_broadcast_i64x2(a);
8255 let e = _mm512_set_epi64(1, 2, 1, 2, 1, 2, 1, 2);
8256 assert_eq_m512i(r, e);
8257 }
8258
8259 #[simd_test(enable = "avx512dq")]
8260 const fn test_mm512_mask_broadcast_i64x2() {
8261 let a = _mm_set_epi64x(1, 2);
8262 let b = _mm512_set_epi64(3, 4, 5, 6, 7, 8, 9, 10);
8263 let r = _mm512_mask_broadcast_i64x2(b, 0b01101001, a);
8264 let e = _mm512_set_epi64(3, 2, 1, 6, 1, 8, 9, 2);
8265 assert_eq_m512i(r, e);
8266 }
8267
8268 #[simd_test(enable = "avx512dq")]
8269 const fn test_mm512_maskz_broadcast_i64x2() {
8270 let a = _mm_set_epi64x(1, 2);
8271 let r = _mm512_maskz_broadcast_i64x2(0b01101001, a);
8272 let e = _mm512_set_epi64(0, 2, 1, 0, 1, 0, 0, 2);
8273 assert_eq_m512i(r, e);
8274 }
8275
8276 #[simd_test(enable = "avx512dq")]
8277 const fn test_mm512_extractf32x8_ps() {
8278 let a = _mm512_set_ps(
8279 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8280 );
8281 let r = _mm512_extractf32x8_ps::<1>(a);
8282 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8283 assert_eq_m256(r, e);
8284 }
8285
8286 #[simd_test(enable = "avx512dq")]
8287 const fn test_mm512_mask_extractf32x8_ps() {
8288 let a = _mm512_set_ps(
8289 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8290 );
8291 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
8292 let r = _mm512_mask_extractf32x8_ps::<1>(b, 0b01101001, a);
8293 let e = _mm256_set_ps(17., 2., 3., 20., 5., 22., 23., 8.);
8294 assert_eq_m256(r, e);
8295 }
8296
8297 #[simd_test(enable = "avx512dq")]
8298 const fn test_mm512_maskz_extractf32x8_ps() {
8299 let a = _mm512_set_ps(
8300 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8301 );
8302 let r = _mm512_maskz_extractf32x8_ps::<1>(0b01101001, a);
8303 let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8304 assert_eq_m256(r, e);
8305 }
8306
8307 #[simd_test(enable = "avx512dq,avx512vl")]
8308 const fn test_mm256_extractf64x2_pd() {
8309 let a = _mm256_set_pd(1., 2., 3., 4.);
8310 let r = _mm256_extractf64x2_pd::<1>(a);
8311 let e = _mm_set_pd(1., 2.);
8312 assert_eq_m128d(r, e);
8313 }
8314
8315 #[simd_test(enable = "avx512dq,avx512vl")]
8316 const fn test_mm256_mask_extractf64x2_pd() {
8317 let a = _mm256_set_pd(1., 2., 3., 4.);
8318 let b = _mm_set_pd(5., 6.);
8319 let r = _mm256_mask_extractf64x2_pd::<1>(b, 0b01, a);
8320 let e = _mm_set_pd(5., 2.);
8321 assert_eq_m128d(r, e);
8322 }
8323
8324 #[simd_test(enable = "avx512dq,avx512vl")]
8325 const fn test_mm256_maskz_extractf64x2_pd() {
8326 let a = _mm256_set_pd(1., 2., 3., 4.);
8327 let r = _mm256_maskz_extractf64x2_pd::<1>(0b01, a);
8328 let e = _mm_set_pd(0., 2.);
8329 assert_eq_m128d(r, e);
8330 }
8331
8332 #[simd_test(enable = "avx512dq")]
8333 const fn test_mm512_extractf64x2_pd() {
8334 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8335 let r = _mm512_extractf64x2_pd::<2>(a);
8336 let e = _mm_set_pd(3., 4.);
8337 assert_eq_m128d(r, e);
8338 }
8339
8340 #[simd_test(enable = "avx512dq")]
8341 const fn test_mm512_mask_extractf64x2_pd() {
8342 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8343 let b = _mm_set_pd(9., 10.);
8344 let r = _mm512_mask_extractf64x2_pd::<2>(b, 0b01, a);
8345 let e = _mm_set_pd(9., 4.);
8346 assert_eq_m128d(r, e);
8347 }
8348
8349 #[simd_test(enable = "avx512dq")]
8350 const fn test_mm512_maskz_extractf64x2_pd() {
8351 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8352 let r = _mm512_maskz_extractf64x2_pd::<2>(0b01, a);
8353 let e = _mm_set_pd(0., 4.);
8354 assert_eq_m128d(r, e);
8355 }
8356
8357 #[simd_test(enable = "avx512dq")]
8358 const fn test_mm512_extracti32x8_epi32() {
8359 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8360 let r = _mm512_extracti32x8_epi32::<1>(a);
8361 let e = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
8362 assert_eq_m256i(r, e);
8363 }
8364
8365 #[simd_test(enable = "avx512dq")]
8366 const fn test_mm512_mask_extracti32x8_epi32() {
8367 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8368 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8369 let r = _mm512_mask_extracti32x8_epi32::<1>(b, 0b01101001, a);
8370 let e = _mm256_set_epi32(17, 2, 3, 20, 5, 22, 23, 8);
8371 assert_eq_m256i(r, e);
8372 }
8373
8374 #[simd_test(enable = "avx512dq")]
8375 const fn test_mm512_maskz_extracti32x8_epi32() {
8376 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8377 let r = _mm512_maskz_extracti32x8_epi32::<1>(0b01101001, a);
8378 let e = _mm256_set_epi32(0, 2, 3, 0, 5, 0, 0, 8);
8379 assert_eq_m256i(r, e);
8380 }
8381
8382 #[simd_test(enable = "avx512dq,avx512vl")]
8383 const fn test_mm256_extracti64x2_epi64() {
8384 let a = _mm256_set_epi64x(1, 2, 3, 4);
8385 let r = _mm256_extracti64x2_epi64::<1>(a);
8386 let e = _mm_set_epi64x(1, 2);
8387 assert_eq_m128i(r, e);
8388 }
8389
8390 #[simd_test(enable = "avx512dq,avx512vl")]
8391 const fn test_mm256_mask_extracti64x2_epi64() {
8392 let a = _mm256_set_epi64x(1, 2, 3, 4);
8393 let b = _mm_set_epi64x(5, 6);
8394 let r = _mm256_mask_extracti64x2_epi64::<1>(b, 0b01, a);
8395 let e = _mm_set_epi64x(5, 2);
8396 assert_eq_m128i(r, e);
8397 }
8398
8399 #[simd_test(enable = "avx512dq,avx512vl")]
8400 const fn test_mm256_maskz_extracti64x2_epi64() {
8401 let a = _mm256_set_epi64x(1, 2, 3, 4);
8402 let r = _mm256_maskz_extracti64x2_epi64::<1>(0b01, a);
8403 let e = _mm_set_epi64x(0, 2);
8404 assert_eq_m128i(r, e);
8405 }
8406
8407 #[simd_test(enable = "avx512dq")]
8408 const fn test_mm512_extracti64x2_epi64() {
8409 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8410 let r = _mm512_extracti64x2_epi64::<2>(a);
8411 let e = _mm_set_epi64x(3, 4);
8412 assert_eq_m128i(r, e);
8413 }
8414
8415 #[simd_test(enable = "avx512dq")]
8416 const fn test_mm512_mask_extracti64x2_epi64() {
8417 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8418 let b = _mm_set_epi64x(9, 10);
8419 let r = _mm512_mask_extracti64x2_epi64::<2>(b, 0b01, a);
8420 let e = _mm_set_epi64x(9, 4);
8421 assert_eq_m128i(r, e);
8422 }
8423
8424 #[simd_test(enable = "avx512dq")]
8425 const fn test_mm512_maskz_extracti64x2_epi64() {
8426 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8427 let r = _mm512_maskz_extracti64x2_epi64::<2>(0b01, a);
8428 let e = _mm_set_epi64x(0, 4);
8429 assert_eq_m128i(r, e);
8430 }
8431
8432 #[simd_test(enable = "avx512dq")]
8433 const fn test_mm512_insertf32x8() {
8434 let a = _mm512_set_ps(
8435 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8436 );
8437 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
8438 let r = _mm512_insertf32x8::<1>(a, b);
8439 let e = _mm512_set_ps(
8440 17., 18., 19., 20., 21., 22., 23., 24., 9., 10., 11., 12., 13., 14., 15., 16.,
8441 );
8442 assert_eq_m512(r, e);
8443 }
8444
8445 #[simd_test(enable = "avx512dq")]
8446 const fn test_mm512_mask_insertf32x8() {
8447 let a = _mm512_set_ps(
8448 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8449 );
8450 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
8451 let src = _mm512_set_ps(
8452 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38., 39., 40.,
8453 );
8454 let r = _mm512_mask_insertf32x8::<1>(src, 0b0110100100111100, a, b);
8455 let e = _mm512_set_ps(
8456 25., 18., 19., 28., 21., 30., 31., 24., 33., 34., 11., 12., 13., 14., 39., 40.,
8457 );
8458 assert_eq_m512(r, e);
8459 }
8460
8461 #[simd_test(enable = "avx512dq")]
8462 const fn test_mm512_maskz_insertf32x8() {
8463 let a = _mm512_set_ps(
8464 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
8465 );
8466 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
8467 let r = _mm512_maskz_insertf32x8::<1>(0b0110100100111100, a, b);
8468 let e = _mm512_set_ps(
8469 0., 18., 19., 0., 21., 0., 0., 24., 0., 0., 11., 12., 13., 14., 0., 0.,
8470 );
8471 assert_eq_m512(r, e);
8472 }
8473
8474 #[simd_test(enable = "avx512dq,avx512vl")]
8475 const fn test_mm256_insertf64x2() {
8476 let a = _mm256_set_pd(1., 2., 3., 4.);
8477 let b = _mm_set_pd(5., 6.);
8478 let r = _mm256_insertf64x2::<1>(a, b);
8479 let e = _mm256_set_pd(5., 6., 3., 4.);
8480 assert_eq_m256d(r, e);
8481 }
8482
8483 #[simd_test(enable = "avx512dq,avx512vl")]
8484 const fn test_mm256_mask_insertf64x2() {
8485 let a = _mm256_set_pd(1., 2., 3., 4.);
8486 let b = _mm_set_pd(5., 6.);
8487 let src = _mm256_set_pd(7., 8., 9., 10.);
8488 let r = _mm256_mask_insertf64x2::<1>(src, 0b0110, a, b);
8489 let e = _mm256_set_pd(7., 6., 3., 10.);
8490 assert_eq_m256d(r, e);
8491 }
8492
8493 #[simd_test(enable = "avx512dq,avx512vl")]
8494 const fn test_mm256_maskz_insertf64x2() {
8495 let a = _mm256_set_pd(1., 2., 3., 4.);
8496 let b = _mm_set_pd(5., 6.);
8497 let r = _mm256_maskz_insertf64x2::<1>(0b0110, a, b);
8498 let e = _mm256_set_pd(0., 6., 3., 0.);
8499 assert_eq_m256d(r, e);
8500 }
8501
8502 #[simd_test(enable = "avx512dq")]
8503 const fn test_mm512_insertf64x2() {
8504 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8505 let b = _mm_set_pd(9., 10.);
8506 let r = _mm512_insertf64x2::<2>(a, b);
8507 let e = _mm512_set_pd(1., 2., 9., 10., 5., 6., 7., 8.);
8508 assert_eq_m512d(r, e);
8509 }
8510
8511 #[simd_test(enable = "avx512dq")]
8512 const fn test_mm512_mask_insertf64x2() {
8513 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8514 let b = _mm_set_pd(9., 10.);
8515 let src = _mm512_set_pd(11., 12., 13., 14., 15., 16., 17., 18.);
8516 let r = _mm512_mask_insertf64x2::<2>(src, 0b01101001, a, b);
8517 let e = _mm512_set_pd(11., 2., 9., 14., 5., 16., 17., 8.);
8518 assert_eq_m512d(r, e);
8519 }
8520
8521 #[simd_test(enable = "avx512dq")]
8522 const fn test_mm512_maskz_insertf64x2() {
8523 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8524 let b = _mm_set_pd(9., 10.);
8525 let r = _mm512_maskz_insertf64x2::<2>(0b01101001, a, b);
8526 let e = _mm512_set_pd(0., 2., 9., 0., 5., 0., 0., 8.);
8527 assert_eq_m512d(r, e);
8528 }
8529
8530 #[simd_test(enable = "avx512dq")]
8531 const fn test_mm512_inserti32x8() {
8532 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8533 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8534 let r = _mm512_inserti32x8::<1>(a, b);
8535 let e = _mm512_set_epi32(
8536 17, 18, 19, 20, 21, 22, 23, 24, 9, 10, 11, 12, 13, 14, 15, 16,
8537 );
8538 assert_eq_m512i(r, e);
8539 }
8540
8541 #[simd_test(enable = "avx512dq")]
8542 const fn test_mm512_mask_inserti32x8() {
8543 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8544 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8545 let src = _mm512_set_epi32(
8546 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
8547 );
8548 let r = _mm512_mask_inserti32x8::<1>(src, 0b0110100100111100, a, b);
8549 let e = _mm512_set_epi32(
8550 25, 18, 19, 28, 21, 30, 31, 24, 33, 34, 11, 12, 13, 14, 39, 40,
8551 );
8552 assert_eq_m512i(r, e);
8553 }
8554
8555 #[simd_test(enable = "avx512dq")]
8556 const fn test_mm512_maskz_inserti32x8() {
8557 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
8558 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
8559 let r = _mm512_maskz_inserti32x8::<1>(0b0110100100111100, a, b);
8560 let e = _mm512_set_epi32(0, 18, 19, 0, 21, 0, 0, 24, 0, 0, 11, 12, 13, 14, 0, 0);
8561 assert_eq_m512i(r, e);
8562 }
8563
8564 #[simd_test(enable = "avx512dq,avx512vl")]
8565 const fn test_mm256_inserti64x2() {
8566 let a = _mm256_set_epi64x(1, 2, 3, 4);
8567 let b = _mm_set_epi64x(5, 6);
8568 let r = _mm256_inserti64x2::<1>(a, b);
8569 let e = _mm256_set_epi64x(5, 6, 3, 4);
8570 assert_eq_m256i(r, e);
8571 }
8572
8573 #[simd_test(enable = "avx512dq,avx512vl")]
8574 const fn test_mm256_mask_inserti64x2() {
8575 let a = _mm256_set_epi64x(1, 2, 3, 4);
8576 let b = _mm_set_epi64x(5, 6);
8577 let src = _mm256_set_epi64x(7, 8, 9, 10);
8578 let r = _mm256_mask_inserti64x2::<1>(src, 0b0110, a, b);
8579 let e = _mm256_set_epi64x(7, 6, 3, 10);
8580 assert_eq_m256i(r, e);
8581 }
8582
8583 #[simd_test(enable = "avx512dq,avx512vl")]
8584 const fn test_mm256_maskz_inserti64x2() {
8585 let a = _mm256_set_epi64x(1, 2, 3, 4);
8586 let b = _mm_set_epi64x(5, 6);
8587 let r = _mm256_maskz_inserti64x2::<1>(0b0110, a, b);
8588 let e = _mm256_set_epi64x(0, 6, 3, 0);
8589 assert_eq_m256i(r, e);
8590 }
8591
8592 #[simd_test(enable = "avx512dq")]
8593 const fn test_mm512_inserti64x2() {
8594 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8595 let b = _mm_set_epi64x(9, 10);
8596 let r = _mm512_inserti64x2::<2>(a, b);
8597 let e = _mm512_set_epi64(1, 2, 9, 10, 5, 6, 7, 8);
8598 assert_eq_m512i(r, e);
8599 }
8600
8601 #[simd_test(enable = "avx512dq")]
8602 const fn test_mm512_mask_inserti64x2() {
8603 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8604 let b = _mm_set_epi64x(9, 10);
8605 let src = _mm512_set_epi64(11, 12, 13, 14, 15, 16, 17, 18);
8606 let r = _mm512_mask_inserti64x2::<2>(src, 0b01101001, a, b);
8607 let e = _mm512_set_epi64(11, 2, 9, 14, 5, 16, 17, 8);
8608 assert_eq_m512i(r, e);
8609 }
8610
8611 #[simd_test(enable = "avx512dq")]
8612 const fn test_mm512_maskz_inserti64x2() {
8613 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8614 let b = _mm_set_epi64x(9, 10);
8615 let r = _mm512_maskz_inserti64x2::<2>(0b01101001, a, b);
8616 let e = _mm512_set_epi64(0, 2, 9, 0, 5, 0, 0, 8);
8617 assert_eq_m512i(r, e);
8618 }
8619
8620 #[simd_test(enable = "avx512dq")]
8621 fn test_mm512_cvt_roundepi64_pd() {
8622 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8623 let r = _mm512_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8624 let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8625 assert_eq_m512d(r, e);
8626 }
8627
8628 #[simd_test(enable = "avx512dq")]
8629 fn test_mm512_mask_cvt_roundepi64_pd() {
8630 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8631 let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8632 let r = _mm512_mask_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8633 b, 0b01101001, a,
8634 );
8635 let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8636 assert_eq_m512d(r, e);
8637 }
8638
8639 #[simd_test(enable = "avx512dq")]
8640 fn test_mm512_maskz_cvt_roundepi64_pd() {
8641 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8642 let r = _mm512_maskz_cvt_roundepi64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8643 0b01101001, a,
8644 );
8645 let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8646 assert_eq_m512d(r, e);
8647 }
8648
8649 #[simd_test(enable = "avx512dq,avx512vl")]
8650 fn test_mm_cvtepi64_pd() {
8651 let a = _mm_set_epi64x(1, 2);
8652 let r = _mm_cvtepi64_pd(a);
8653 let e = _mm_set_pd(1., 2.);
8654 assert_eq_m128d(r, e);
8655 }
8656
8657 #[simd_test(enable = "avx512dq,avx512vl")]
8658 fn test_mm_mask_cvtepi64_pd() {
8659 let a = _mm_set_epi64x(1, 2);
8660 let b = _mm_set_pd(3., 4.);
8661 let r = _mm_mask_cvtepi64_pd(b, 0b01, a);
8662 let e = _mm_set_pd(3., 2.);
8663 assert_eq_m128d(r, e);
8664 }
8665
8666 #[simd_test(enable = "avx512dq,avx512vl")]
8667 fn test_mm_maskz_cvtepi64_pd() {
8668 let a = _mm_set_epi64x(1, 2);
8669 let r = _mm_maskz_cvtepi64_pd(0b01, a);
8670 let e = _mm_set_pd(0., 2.);
8671 assert_eq_m128d(r, e);
8672 }
8673
8674 #[simd_test(enable = "avx512dq,avx512vl")]
8675 fn test_mm256_cvtepi64_pd() {
8676 let a = _mm256_set_epi64x(1, 2, 3, 4);
8677 let r = _mm256_cvtepi64_pd(a);
8678 let e = _mm256_set_pd(1., 2., 3., 4.);
8679 assert_eq_m256d(r, e);
8680 }
8681
8682 #[simd_test(enable = "avx512dq,avx512vl")]
8683 fn test_mm256_mask_cvtepi64_pd() {
8684 let a = _mm256_set_epi64x(1, 2, 3, 4);
8685 let b = _mm256_set_pd(5., 6., 7., 8.);
8686 let r = _mm256_mask_cvtepi64_pd(b, 0b0110, a);
8687 let e = _mm256_set_pd(5., 2., 3., 8.);
8688 assert_eq_m256d(r, e);
8689 }
8690
8691 #[simd_test(enable = "avx512dq,avx512vl")]
8692 fn test_mm256_maskz_cvtepi64_pd() {
8693 let a = _mm256_set_epi64x(1, 2, 3, 4);
8694 let r = _mm256_maskz_cvtepi64_pd(0b0110, a);
8695 let e = _mm256_set_pd(0., 2., 3., 0.);
8696 assert_eq_m256d(r, e);
8697 }
8698
8699 #[simd_test(enable = "avx512dq")]
8700 fn test_mm512_cvtepi64_pd() {
8701 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8702 let r = _mm512_cvtepi64_pd(a);
8703 let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8704 assert_eq_m512d(r, e);
8705 }
8706
8707 #[simd_test(enable = "avx512dq")]
8708 fn test_mm512_mask_cvtepi64_pd() {
8709 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8710 let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8711 let r = _mm512_mask_cvtepi64_pd(b, 0b01101001, a);
8712 let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8713 assert_eq_m512d(r, e);
8714 }
8715
8716 #[simd_test(enable = "avx512dq")]
8717 fn test_mm512_maskz_cvtepi64_pd() {
8718 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8719 let r = _mm512_maskz_cvtepi64_pd(0b01101001, a);
8720 let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8721 assert_eq_m512d(r, e);
8722 }
8723
8724 #[simd_test(enable = "avx512dq")]
8725 fn test_mm512_cvt_roundepi64_ps() {
8726 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8727 let r = _mm512_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8728 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8729 assert_eq_m256(r, e);
8730 }
8731
8732 #[simd_test(enable = "avx512dq")]
8733 fn test_mm512_mask_cvt_roundepi64_ps() {
8734 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8735 let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
8736 let r = _mm512_mask_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8737 b, 0b01101001, a,
8738 );
8739 let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
8740 assert_eq_m256(r, e);
8741 }
8742
8743 #[simd_test(enable = "avx512dq")]
8744 fn test_mm512_maskz_cvt_roundepi64_ps() {
8745 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8746 let r = _mm512_maskz_cvt_roundepi64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8747 0b01101001, a,
8748 );
8749 let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8750 assert_eq_m256(r, e);
8751 }
8752
8753 #[simd_test(enable = "avx512dq,avx512vl")]
8754 fn test_mm_cvtepi64_ps() {
8755 let a = _mm_set_epi64x(1, 2);
8756 let r = _mm_cvtepi64_ps(a);
8757 let e = _mm_set_ps(0., 0., 1., 2.);
8758 assert_eq_m128(r, e);
8759 }
8760
8761 #[simd_test(enable = "avx512dq,avx512vl")]
8762 fn test_mm_mask_cvtepi64_ps() {
8763 let a = _mm_set_epi64x(1, 2);
8764 let b = _mm_set_ps(3., 4., 5., 6.);
8765 let r = _mm_mask_cvtepi64_ps(b, 0b01, a);
8766 let e = _mm_set_ps(0., 0., 5., 2.);
8767 assert_eq_m128(r, e);
8768 }
8769
8770 #[simd_test(enable = "avx512dq,avx512vl")]
8771 fn test_mm_maskz_cvtepi64_ps() {
8772 let a = _mm_set_epi64x(1, 2);
8773 let r = _mm_maskz_cvtepi64_ps(0b01, a);
8774 let e = _mm_set_ps(0., 0., 0., 2.);
8775 assert_eq_m128(r, e);
8776 }
8777
8778 #[simd_test(enable = "avx512dq,avx512vl")]
8779 fn test_mm256_cvtepi64_ps() {
8780 let a = _mm256_set_epi64x(1, 2, 3, 4);
8781 let r = _mm256_cvtepi64_ps(a);
8782 let e = _mm_set_ps(1., 2., 3., 4.);
8783 assert_eq_m128(r, e);
8784 }
8785
8786 #[simd_test(enable = "avx512dq,avx512vl")]
8787 fn test_mm256_mask_cvtepi64_ps() {
8788 let a = _mm256_set_epi64x(1, 2, 3, 4);
8789 let b = _mm_set_ps(5., 6., 7., 8.);
8790 let r = _mm256_mask_cvtepi64_ps(b, 0b0110, a);
8791 let e = _mm_set_ps(5., 2., 3., 8.);
8792 assert_eq_m128(r, e);
8793 }
8794
8795 #[simd_test(enable = "avx512dq,avx512vl")]
8796 fn test_mm256_maskz_cvtepi64_ps() {
8797 let a = _mm256_set_epi64x(1, 2, 3, 4);
8798 let r = _mm256_maskz_cvtepi64_ps(0b0110, a);
8799 let e = _mm_set_ps(0., 2., 3., 0.);
8800 assert_eq_m128(r, e);
8801 }
8802
8803 #[simd_test(enable = "avx512dq")]
8804 fn test_mm512_cvtepi64_ps() {
8805 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8806 let r = _mm512_cvtepi64_ps(a);
8807 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8808 assert_eq_m256(r, e);
8809 }
8810
8811 #[simd_test(enable = "avx512dq")]
8812 fn test_mm512_mask_cvtepi64_ps() {
8813 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8814 let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
8815 let r = _mm512_mask_cvtepi64_ps(b, 0b01101001, a);
8816 let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
8817 assert_eq_m256(r, e);
8818 }
8819
8820 #[simd_test(enable = "avx512dq")]
8821 fn test_mm512_maskz_cvtepi64_ps() {
8822 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8823 let r = _mm512_maskz_cvtepi64_ps(0b01101001, a);
8824 let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8825 assert_eq_m256(r, e);
8826 }
8827
8828 #[simd_test(enable = "avx512dq")]
8829 fn test_mm512_cvt_roundepu64_pd() {
8830 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8831 let r = _mm512_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8832 let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8833 assert_eq_m512d(r, e);
8834 }
8835
8836 #[simd_test(enable = "avx512dq")]
8837 fn test_mm512_mask_cvt_roundepu64_pd() {
8838 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8839 let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8840 let r = _mm512_mask_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8841 b, 0b01101001, a,
8842 );
8843 let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8844 assert_eq_m512d(r, e);
8845 }
8846
8847 #[simd_test(enable = "avx512dq")]
8848 fn test_mm512_maskz_cvt_roundepu64_pd() {
8849 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8850 let r = _mm512_maskz_cvt_roundepu64_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8851 0b01101001, a,
8852 );
8853 let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8854 assert_eq_m512d(r, e);
8855 }
8856
8857 #[simd_test(enable = "avx512dq,avx512vl")]
8858 fn test_mm_cvtepu64_pd() {
8859 let a = _mm_set_epi64x(1, 2);
8860 let r = _mm_cvtepu64_pd(a);
8861 let e = _mm_set_pd(1., 2.);
8862 assert_eq_m128d(r, e);
8863 }
8864
8865 #[simd_test(enable = "avx512dq,avx512vl")]
8866 fn test_mm_mask_cvtepu64_pd() {
8867 let a = _mm_set_epi64x(1, 2);
8868 let b = _mm_set_pd(3., 4.);
8869 let r = _mm_mask_cvtepu64_pd(b, 0b01, a);
8870 let e = _mm_set_pd(3., 2.);
8871 assert_eq_m128d(r, e);
8872 }
8873
8874 #[simd_test(enable = "avx512dq,avx512vl")]
8875 fn test_mm_maskz_cvtepu64_pd() {
8876 let a = _mm_set_epi64x(1, 2);
8877 let r = _mm_maskz_cvtepu64_pd(0b01, a);
8878 let e = _mm_set_pd(0., 2.);
8879 assert_eq_m128d(r, e);
8880 }
8881
8882 #[simd_test(enable = "avx512dq,avx512vl")]
8883 fn test_mm256_cvtepu64_pd() {
8884 let a = _mm256_set_epi64x(1, 2, 3, 4);
8885 let r = _mm256_cvtepu64_pd(a);
8886 let e = _mm256_set_pd(1., 2., 3., 4.);
8887 assert_eq_m256d(r, e);
8888 }
8889
8890 #[simd_test(enable = "avx512dq,avx512vl")]
8891 fn test_mm256_mask_cvtepu64_pd() {
8892 let a = _mm256_set_epi64x(1, 2, 3, 4);
8893 let b = _mm256_set_pd(5., 6., 7., 8.);
8894 let r = _mm256_mask_cvtepu64_pd(b, 0b0110, a);
8895 let e = _mm256_set_pd(5., 2., 3., 8.);
8896 assert_eq_m256d(r, e);
8897 }
8898
8899 #[simd_test(enable = "avx512dq,avx512vl")]
8900 fn test_mm256_maskz_cvtepu64_pd() {
8901 let a = _mm256_set_epi64x(1, 2, 3, 4);
8902 let r = _mm256_maskz_cvtepu64_pd(0b0110, a);
8903 let e = _mm256_set_pd(0., 2., 3., 0.);
8904 assert_eq_m256d(r, e);
8905 }
8906
8907 #[simd_test(enable = "avx512dq")]
8908 fn test_mm512_cvtepu64_pd() {
8909 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8910 let r = _mm512_cvtepu64_pd(a);
8911 let e = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
8912 assert_eq_m512d(r, e);
8913 }
8914
8915 #[simd_test(enable = "avx512dq")]
8916 fn test_mm512_mask_cvtepu64_pd() {
8917 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8918 let b = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
8919 let r = _mm512_mask_cvtepu64_pd(b, 0b01101001, a);
8920 let e = _mm512_set_pd(9., 2., 3., 12., 5., 14., 15., 8.);
8921 assert_eq_m512d(r, e);
8922 }
8923
8924 #[simd_test(enable = "avx512dq")]
8925 fn test_mm512_maskz_cvtepu64_pd() {
8926 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8927 let r = _mm512_maskz_cvtepu64_pd(0b01101001, a);
8928 let e = _mm512_set_pd(0., 2., 3., 0., 5., 0., 0., 8.);
8929 assert_eq_m512d(r, e);
8930 }
8931
8932 #[simd_test(enable = "avx512dq")]
8933 fn test_mm512_cvt_roundepu64_ps() {
8934 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8935 let r = _mm512_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
8936 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
8937 assert_eq_m256(r, e);
8938 }
8939
8940 #[simd_test(enable = "avx512dq")]
8941 fn test_mm512_mask_cvt_roundepu64_ps() {
8942 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8943 let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
8944 let r = _mm512_mask_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8945 b, 0b01101001, a,
8946 );
8947 let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
8948 assert_eq_m256(r, e);
8949 }
8950
8951 #[simd_test(enable = "avx512dq")]
8952 fn test_mm512_maskz_cvt_roundepu64_ps() {
8953 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
8954 let r = _mm512_maskz_cvt_roundepu64_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
8955 0b01101001, a,
8956 );
8957 let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
8958 assert_eq_m256(r, e);
8959 }
8960
8961 #[simd_test(enable = "avx512dq,avx512vl")]
8962 fn test_mm_cvtepu64_ps() {
8963 let a = _mm_set_epi64x(1, 2);
8964 let r = _mm_cvtepu64_ps(a);
8965 let e = _mm_set_ps(0., 0., 1., 2.);
8966 assert_eq_m128(r, e);
8967 }
8968
8969 #[simd_test(enable = "avx512dq,avx512vl")]
8970 fn test_mm_mask_cvtepu64_ps() {
8971 let a = _mm_set_epi64x(1, 2);
8972 let b = _mm_set_ps(3., 4., 5., 6.);
8973 let r = _mm_mask_cvtepu64_ps(b, 0b01, a);
8974 let e = _mm_set_ps(0., 0., 5., 2.);
8975 assert_eq_m128(r, e);
8976 }
8977
8978 #[simd_test(enable = "avx512dq,avx512vl")]
8979 fn test_mm_maskz_cvtepu64_ps() {
8980 let a = _mm_set_epi64x(1, 2);
8981 let r = _mm_maskz_cvtepu64_ps(0b01, a);
8982 let e = _mm_set_ps(0., 0., 0., 2.);
8983 assert_eq_m128(r, e);
8984 }
8985
8986 #[simd_test(enable = "avx512dq,avx512vl")]
8987 fn test_mm256_cvtepu64_ps() {
8988 let a = _mm256_set_epi64x(1, 2, 3, 4);
8989 let r = _mm256_cvtepu64_ps(a);
8990 let e = _mm_set_ps(1., 2., 3., 4.);
8991 assert_eq_m128(r, e);
8992 }
8993
8994 #[simd_test(enable = "avx512dq,avx512vl")]
8995 fn test_mm256_mask_cvtepu64_ps() {
8996 let a = _mm256_set_epi64x(1, 2, 3, 4);
8997 let b = _mm_set_ps(5., 6., 7., 8.);
8998 let r = _mm256_mask_cvtepu64_ps(b, 0b0110, a);
8999 let e = _mm_set_ps(5., 2., 3., 8.);
9000 assert_eq_m128(r, e);
9001 }
9002
9003 #[simd_test(enable = "avx512dq,avx512vl")]
9004 fn test_mm256_maskz_cvtepu64_ps() {
9005 let a = _mm256_set_epi64x(1, 2, 3, 4);
9006 let r = _mm256_maskz_cvtepu64_ps(0b0110, a);
9007 let e = _mm_set_ps(0., 2., 3., 0.);
9008 assert_eq_m128(r, e);
9009 }
9010
9011 #[simd_test(enable = "avx512dq")]
9012 fn test_mm512_cvtepu64_ps() {
9013 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9014 let r = _mm512_cvtepu64_ps(a);
9015 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9016 assert_eq_m256(r, e);
9017 }
9018
9019 #[simd_test(enable = "avx512dq")]
9020 fn test_mm512_mask_cvtepu64_ps() {
9021 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9022 let b = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
9023 let r = _mm512_mask_cvtepu64_ps(b, 0b01101001, a);
9024 let e = _mm256_set_ps(9., 2., 3., 12., 5., 14., 15., 8.);
9025 assert_eq_m256(r, e);
9026 }
9027
9028 #[simd_test(enable = "avx512dq")]
9029 fn test_mm512_maskz_cvtepu64_ps() {
9030 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9031 let r = _mm512_maskz_cvtepu64_ps(0b01101001, a);
9032 let e = _mm256_set_ps(0., 2., 3., 0., 5., 0., 0., 8.);
9033 assert_eq_m256(r, e);
9034 }
9035
9036 #[simd_test(enable = "avx512dq")]
9037 fn test_mm512_cvt_roundpd_epi64() {
9038 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9039 let r = _mm512_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
9040 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9041 assert_eq_m512i(r, e);
9042 }
9043
9044 #[simd_test(enable = "avx512dq")]
9045 fn test_mm512_mask_cvt_roundpd_epi64() {
9046 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9047 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9048 let r = _mm512_mask_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9049 b, 0b01101001, a,
9050 );
9051 let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9052 assert_eq_m512i(r, e);
9053 }
9054
9055 #[simd_test(enable = "avx512dq")]
9056 fn test_mm512_maskz_cvt_roundpd_epi64() {
9057 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9058 let r = _mm512_maskz_cvt_roundpd_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9059 0b01101001, a,
9060 );
9061 let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9062 assert_eq_m512i(r, e);
9063 }
9064
9065 #[simd_test(enable = "avx512dq,avx512vl")]
9066 fn test_mm_cvtpd_epi64() {
9067 let a = _mm_set_pd(1., 2.);
9068 let r = _mm_cvtpd_epi64(a);
9069 let e = _mm_set_epi64x(1, 2);
9070 assert_eq_m128i(r, e);
9071 }
9072
9073 #[simd_test(enable = "avx512dq,avx512vl")]
9074 fn test_mm_mask_cvtpd_epi64() {
9075 let a = _mm_set_pd(1., 2.);
9076 let b = _mm_set_epi64x(3, 4);
9077 let r = _mm_mask_cvtpd_epi64(b, 0b01, a);
9078 let e = _mm_set_epi64x(3, 2);
9079 assert_eq_m128i(r, e);
9080 }
9081
9082 #[simd_test(enable = "avx512dq,avx512vl")]
9083 fn test_mm_maskz_cvtpd_epi64() {
9084 let a = _mm_set_pd(1., 2.);
9085 let r = _mm_maskz_cvtpd_epi64(0b01, a);
9086 let e = _mm_set_epi64x(0, 2);
9087 assert_eq_m128i(r, e);
9088 }
9089
9090 #[simd_test(enable = "avx512dq,avx512vl")]
9091 fn test_mm256_cvtpd_epi64() {
9092 let a = _mm256_set_pd(1., 2., 3., 4.);
9093 let r = _mm256_cvtpd_epi64(a);
9094 let e = _mm256_set_epi64x(1, 2, 3, 4);
9095 assert_eq_m256i(r, e);
9096 }
9097
9098 #[simd_test(enable = "avx512dq,avx512vl")]
9099 fn test_mm256_mask_cvtpd_epi64() {
9100 let a = _mm256_set_pd(1., 2., 3., 4.);
9101 let b = _mm256_set_epi64x(5, 6, 7, 8);
9102 let r = _mm256_mask_cvtpd_epi64(b, 0b0110, a);
9103 let e = _mm256_set_epi64x(5, 2, 3, 8);
9104 assert_eq_m256i(r, e);
9105 }
9106
9107 #[simd_test(enable = "avx512dq,avx512vl")]
9108 fn test_mm256_maskz_cvtpd_epi64() {
9109 let a = _mm256_set_pd(1., 2., 3., 4.);
9110 let r = _mm256_maskz_cvtpd_epi64(0b0110, a);
9111 let e = _mm256_set_epi64x(0, 2, 3, 0);
9112 assert_eq_m256i(r, e);
9113 }
9114
9115 #[simd_test(enable = "avx512dq")]
9116 fn test_mm512_cvtpd_epi64() {
9117 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9118 let r = _mm512_cvtpd_epi64(a);
9119 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9120 assert_eq_m512i(r, e);
9121 }
9122
9123 #[simd_test(enable = "avx512dq")]
9124 fn test_mm512_mask_cvtpd_epi64() {
9125 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9126 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9127 let r = _mm512_mask_cvtpd_epi64(b, 0b01101001, a);
9128 let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9129 assert_eq_m512i(r, e);
9130 }
9131
9132 #[simd_test(enable = "avx512dq")]
9133 fn test_mm512_maskz_cvtpd_epi64() {
9134 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9135 let r = _mm512_maskz_cvtpd_epi64(0b01101001, a);
9136 let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9137 assert_eq_m512i(r, e);
9138 }
9139
9140 #[simd_test(enable = "avx512dq")]
9141 fn test_mm512_cvt_roundps_epi64() {
9142 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9143 let r = _mm512_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
9144 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9145 assert_eq_m512i(r, e);
9146 }
9147
9148 #[simd_test(enable = "avx512dq")]
9149 fn test_mm512_mask_cvt_roundps_epi64() {
9150 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9151 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9152 let r = _mm512_mask_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9153 b, 0b01101001, a,
9154 );
9155 let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9156 assert_eq_m512i(r, e);
9157 }
9158
9159 #[simd_test(enable = "avx512dq")]
9160 fn test_mm512_maskz_cvt_roundps_epi64() {
9161 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9162 let r = _mm512_maskz_cvt_roundps_epi64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9163 0b01101001, a,
9164 );
9165 let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9166 assert_eq_m512i(r, e);
9167 }
9168
9169 #[simd_test(enable = "avx512dq,avx512vl")]
9170 fn test_mm_cvtps_epi64() {
9171 let a = _mm_set_ps(1., 2., 3., 4.);
9172 let r = _mm_cvtps_epi64(a);
9173 let e = _mm_set_epi64x(3, 4);
9174 assert_eq_m128i(r, e);
9175 }
9176
9177 #[simd_test(enable = "avx512dq,avx512vl")]
9178 fn test_mm_mask_cvtps_epi64() {
9179 let a = _mm_set_ps(1., 2., 3., 4.);
9180 let b = _mm_set_epi64x(5, 6);
9181 let r = _mm_mask_cvtps_epi64(b, 0b01, a);
9182 let e = _mm_set_epi64x(5, 4);
9183 assert_eq_m128i(r, e);
9184 }
9185
9186 #[simd_test(enable = "avx512dq,avx512vl")]
9187 fn test_mm_maskz_cvtps_epi64() {
9188 let a = _mm_set_ps(1., 2., 3., 4.);
9189 let r = _mm_maskz_cvtps_epi64(0b01, a);
9190 let e = _mm_set_epi64x(0, 4);
9191 assert_eq_m128i(r, e);
9192 }
9193
9194 #[simd_test(enable = "avx512dq,avx512vl")]
9195 fn test_mm256_cvtps_epi64() {
9196 let a = _mm_set_ps(1., 2., 3., 4.);
9197 let r = _mm256_cvtps_epi64(a);
9198 let e = _mm256_set_epi64x(1, 2, 3, 4);
9199 assert_eq_m256i(r, e);
9200 }
9201
9202 #[simd_test(enable = "avx512dq,avx512vl")]
9203 fn test_mm256_mask_cvtps_epi64() {
9204 let a = _mm_set_ps(1., 2., 3., 4.);
9205 let b = _mm256_set_epi64x(5, 6, 7, 8);
9206 let r = _mm256_mask_cvtps_epi64(b, 0b0110, a);
9207 let e = _mm256_set_epi64x(5, 2, 3, 8);
9208 assert_eq_m256i(r, e);
9209 }
9210
9211 #[simd_test(enable = "avx512dq,avx512vl")]
9212 fn test_mm256_maskz_cvtps_epi64() {
9213 let a = _mm_set_ps(1., 2., 3., 4.);
9214 let r = _mm256_maskz_cvtps_epi64(0b0110, a);
9215 let e = _mm256_set_epi64x(0, 2, 3, 0);
9216 assert_eq_m256i(r, e);
9217 }
9218
9219 #[simd_test(enable = "avx512dq")]
9220 fn test_mm512_cvtps_epi64() {
9221 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9222 let r = _mm512_cvtps_epi64(a);
9223 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9224 assert_eq_m512i(r, e);
9225 }
9226
9227 #[simd_test(enable = "avx512dq")]
9228 fn test_mm512_mask_cvtps_epi64() {
9229 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9230 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9231 let r = _mm512_mask_cvtps_epi64(b, 0b01101001, a);
9232 let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9233 assert_eq_m512i(r, e);
9234 }
9235
9236 #[simd_test(enable = "avx512dq")]
9237 fn test_mm512_maskz_cvtps_epi64() {
9238 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9239 let r = _mm512_maskz_cvtps_epi64(0b01101001, a);
9240 let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9241 assert_eq_m512i(r, e);
9242 }
9243
9244 #[simd_test(enable = "avx512dq")]
9245 fn test_mm512_cvt_roundpd_epu64() {
9246 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9247 let r = _mm512_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
9248 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9249 assert_eq_m512i(r, e);
9250 }
9251
9252 #[simd_test(enable = "avx512dq")]
9253 fn test_mm512_mask_cvt_roundpd_epu64() {
9254 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9255 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9256 let r = _mm512_mask_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9257 b, 0b01101001, a,
9258 );
9259 let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9260 assert_eq_m512i(r, e);
9261 }
9262
9263 #[simd_test(enable = "avx512dq")]
9264 fn test_mm512_maskz_cvt_roundpd_epu64() {
9265 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9266 let r = _mm512_maskz_cvt_roundpd_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9267 0b01101001, a,
9268 );
9269 let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9270 assert_eq_m512i(r, e);
9271 }
9272
9273 #[simd_test(enable = "avx512dq,avx512vl")]
9274 fn test_mm_cvtpd_epu64() {
9275 let a = _mm_set_pd(1., 2.);
9276 let r = _mm_cvtpd_epu64(a);
9277 let e = _mm_set_epi64x(1, 2);
9278 assert_eq_m128i(r, e);
9279 }
9280
9281 #[simd_test(enable = "avx512dq,avx512vl")]
9282 fn test_mm_mask_cvtpd_epu64() {
9283 let a = _mm_set_pd(1., 2.);
9284 let b = _mm_set_epi64x(3, 4);
9285 let r = _mm_mask_cvtpd_epu64(b, 0b01, a);
9286 let e = _mm_set_epi64x(3, 2);
9287 assert_eq_m128i(r, e);
9288 }
9289
9290 #[simd_test(enable = "avx512dq,avx512vl")]
9291 fn test_mm_maskz_cvtpd_epu64() {
9292 let a = _mm_set_pd(1., 2.);
9293 let r = _mm_maskz_cvtpd_epu64(0b01, a);
9294 let e = _mm_set_epi64x(0, 2);
9295 assert_eq_m128i(r, e);
9296 }
9297
9298 #[simd_test(enable = "avx512dq,avx512vl")]
9299 fn test_mm256_cvtpd_epu64() {
9300 let a = _mm256_set_pd(1., 2., 3., 4.);
9301 let r = _mm256_cvtpd_epu64(a);
9302 let e = _mm256_set_epi64x(1, 2, 3, 4);
9303 assert_eq_m256i(r, e);
9304 }
9305
9306 #[simd_test(enable = "avx512dq,avx512vl")]
9307 fn test_mm256_mask_cvtpd_epu64() {
9308 let a = _mm256_set_pd(1., 2., 3., 4.);
9309 let b = _mm256_set_epi64x(5, 6, 7, 8);
9310 let r = _mm256_mask_cvtpd_epu64(b, 0b0110, a);
9311 let e = _mm256_set_epi64x(5, 2, 3, 8);
9312 assert_eq_m256i(r, e);
9313 }
9314
9315 #[simd_test(enable = "avx512dq,avx512vl")]
9316 fn test_mm256_maskz_cvtpd_epu64() {
9317 let a = _mm256_set_pd(1., 2., 3., 4.);
9318 let r = _mm256_maskz_cvtpd_epu64(0b0110, a);
9319 let e = _mm256_set_epi64x(0, 2, 3, 0);
9320 assert_eq_m256i(r, e);
9321 }
9322
9323 #[simd_test(enable = "avx512dq")]
9324 fn test_mm512_cvtpd_epu64() {
9325 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9326 let r = _mm512_cvtpd_epu64(a);
9327 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9328 assert_eq_m512i(r, e);
9329 }
9330
9331 #[simd_test(enable = "avx512dq")]
9332 fn test_mm512_mask_cvtpd_epu64() {
9333 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9334 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9335 let r = _mm512_mask_cvtpd_epu64(b, 0b01101001, a);
9336 let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9337 assert_eq_m512i(r, e);
9338 }
9339
9340 #[simd_test(enable = "avx512dq")]
9341 fn test_mm512_maskz_cvtpd_epu64() {
9342 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9343 let r = _mm512_maskz_cvtpd_epu64(0b01101001, a);
9344 let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9345 assert_eq_m512i(r, e);
9346 }
9347
9348 #[simd_test(enable = "avx512dq")]
9349 fn test_mm512_cvt_roundps_epu64() {
9350 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9351 let r = _mm512_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
9352 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9353 assert_eq_m512i(r, e);
9354 }
9355
9356 #[simd_test(enable = "avx512dq")]
9357 fn test_mm512_mask_cvt_roundps_epu64() {
9358 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9359 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9360 let r = _mm512_mask_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9361 b, 0b01101001, a,
9362 );
9363 let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9364 assert_eq_m512i(r, e);
9365 }
9366
9367 #[simd_test(enable = "avx512dq")]
9368 fn test_mm512_maskz_cvt_roundps_epu64() {
9369 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9370 let r = _mm512_maskz_cvt_roundps_epu64::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
9371 0b01101001, a,
9372 );
9373 let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9374 assert_eq_m512i(r, e);
9375 }
9376
9377 #[simd_test(enable = "avx512dq,avx512vl")]
9378 fn test_mm_cvtps_epu64() {
9379 let a = _mm_set_ps(1., 2., 3., 4.);
9380 let r = _mm_cvtps_epu64(a);
9381 let e = _mm_set_epi64x(3, 4);
9382 assert_eq_m128i(r, e);
9383 }
9384
9385 #[simd_test(enable = "avx512dq,avx512vl")]
9386 fn test_mm_mask_cvtps_epu64() {
9387 let a = _mm_set_ps(1., 2., 3., 4.);
9388 let b = _mm_set_epi64x(5, 6);
9389 let r = _mm_mask_cvtps_epu64(b, 0b01, a);
9390 let e = _mm_set_epi64x(5, 4);
9391 assert_eq_m128i(r, e);
9392 }
9393
9394 #[simd_test(enable = "avx512dq,avx512vl")]
9395 fn test_mm_maskz_cvtps_epu64() {
9396 let a = _mm_set_ps(1., 2., 3., 4.);
9397 let r = _mm_maskz_cvtps_epu64(0b01, a);
9398 let e = _mm_set_epi64x(0, 4);
9399 assert_eq_m128i(r, e);
9400 }
9401
9402 #[simd_test(enable = "avx512dq,avx512vl")]
9403 fn test_mm256_cvtps_epu64() {
9404 let a = _mm_set_ps(1., 2., 3., 4.);
9405 let r = _mm256_cvtps_epu64(a);
9406 let e = _mm256_set_epi64x(1, 2, 3, 4);
9407 assert_eq_m256i(r, e);
9408 }
9409
9410 #[simd_test(enable = "avx512dq,avx512vl")]
9411 fn test_mm256_mask_cvtps_epu64() {
9412 let a = _mm_set_ps(1., 2., 3., 4.);
9413 let b = _mm256_set_epi64x(5, 6, 7, 8);
9414 let r = _mm256_mask_cvtps_epu64(b, 0b0110, a);
9415 let e = _mm256_set_epi64x(5, 2, 3, 8);
9416 assert_eq_m256i(r, e);
9417 }
9418
9419 #[simd_test(enable = "avx512dq,avx512vl")]
9420 fn test_mm256_maskz_cvtps_epu64() {
9421 let a = _mm_set_ps(1., 2., 3., 4.);
9422 let r = _mm256_maskz_cvtps_epu64(0b0110, a);
9423 let e = _mm256_set_epi64x(0, 2, 3, 0);
9424 assert_eq_m256i(r, e);
9425 }
9426
9427 #[simd_test(enable = "avx512dq")]
9428 fn test_mm512_cvtps_epu64() {
9429 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9430 let r = _mm512_cvtps_epu64(a);
9431 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9432 assert_eq_m512i(r, e);
9433 }
9434
9435 #[simd_test(enable = "avx512dq")]
9436 fn test_mm512_mask_cvtps_epu64() {
9437 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9438 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9439 let r = _mm512_mask_cvtps_epu64(b, 0b01101001, a);
9440 let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9441 assert_eq_m512i(r, e);
9442 }
9443
9444 #[simd_test(enable = "avx512dq")]
9445 fn test_mm512_maskz_cvtps_epu64() {
9446 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9447 let r = _mm512_maskz_cvtps_epu64(0b01101001, a);
9448 let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9449 assert_eq_m512i(r, e);
9450 }
9451
9452 #[simd_test(enable = "avx512dq")]
9453 fn test_mm512_cvtt_roundpd_epi64() {
9454 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9455 let r = _mm512_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(a);
9456 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9457 assert_eq_m512i(r, e);
9458 }
9459
9460 #[simd_test(enable = "avx512dq")]
9461 fn test_mm512_mask_cvtt_roundpd_epi64() {
9462 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9463 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9464 let r = _mm512_mask_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9465 let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9466 assert_eq_m512i(r, e);
9467 }
9468
9469 #[simd_test(enable = "avx512dq")]
9470 fn test_mm512_maskz_cvtt_roundpd_epi64() {
9471 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9472 let r = _mm512_maskz_cvtt_roundpd_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9473 let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9474 assert_eq_m512i(r, e);
9475 }
9476
9477 #[simd_test(enable = "avx512dq,avx512vl")]
9478 fn test_mm_cvttpd_epi64() {
9479 let a = _mm_set_pd(1., 2.);
9480 let r = _mm_cvttpd_epi64(a);
9481 let e = _mm_set_epi64x(1, 2);
9482 assert_eq_m128i(r, e);
9483 }
9484
9485 #[simd_test(enable = "avx512dq,avx512vl")]
9486 fn test_mm_mask_cvttpd_epi64() {
9487 let a = _mm_set_pd(1., 2.);
9488 let b = _mm_set_epi64x(3, 4);
9489 let r = _mm_mask_cvttpd_epi64(b, 0b01, a);
9490 let e = _mm_set_epi64x(3, 2);
9491 assert_eq_m128i(r, e);
9492 }
9493
9494 #[simd_test(enable = "avx512dq,avx512vl")]
9495 fn test_mm_maskz_cvttpd_epi64() {
9496 let a = _mm_set_pd(1., 2.);
9497 let r = _mm_maskz_cvttpd_epi64(0b01, a);
9498 let e = _mm_set_epi64x(0, 2);
9499 assert_eq_m128i(r, e);
9500 }
9501
9502 #[simd_test(enable = "avx512dq,avx512vl")]
9503 fn test_mm256_cvttpd_epi64() {
9504 let a = _mm256_set_pd(1., 2., 3., 4.);
9505 let r = _mm256_cvttpd_epi64(a);
9506 let e = _mm256_set_epi64x(1, 2, 3, 4);
9507 assert_eq_m256i(r, e);
9508 }
9509
9510 #[simd_test(enable = "avx512dq,avx512vl")]
9511 fn test_mm256_mask_cvttpd_epi64() {
9512 let a = _mm256_set_pd(1., 2., 3., 4.);
9513 let b = _mm256_set_epi64x(5, 6, 7, 8);
9514 let r = _mm256_mask_cvttpd_epi64(b, 0b0110, a);
9515 let e = _mm256_set_epi64x(5, 2, 3, 8);
9516 assert_eq_m256i(r, e);
9517 }
9518
9519 #[simd_test(enable = "avx512dq,avx512vl")]
9520 fn test_mm256_maskz_cvttpd_epi64() {
9521 let a = _mm256_set_pd(1., 2., 3., 4.);
9522 let r = _mm256_maskz_cvttpd_epi64(0b0110, a);
9523 let e = _mm256_set_epi64x(0, 2, 3, 0);
9524 assert_eq_m256i(r, e);
9525 }
9526
9527 #[simd_test(enable = "avx512dq")]
9528 fn test_mm512_cvttpd_epi64() {
9529 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9530 let r = _mm512_cvttpd_epi64(a);
9531 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9532 assert_eq_m512i(r, e);
9533 }
9534
9535 #[simd_test(enable = "avx512dq")]
9536 fn test_mm512_mask_cvttpd_epi64() {
9537 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9538 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9539 let r = _mm512_mask_cvttpd_epi64(b, 0b01101001, a);
9540 let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9541 assert_eq_m512i(r, e);
9542 }
9543
9544 #[simd_test(enable = "avx512dq")]
9545 fn test_mm512_maskz_cvttpd_epi64() {
9546 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9547 let r = _mm512_maskz_cvttpd_epi64(0b01101001, a);
9548 let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9549 assert_eq_m512i(r, e);
9550 }
9551
9552 #[simd_test(enable = "avx512dq")]
9553 fn test_mm512_cvtt_roundps_epi64() {
9554 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9555 let r = _mm512_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(a);
9556 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9557 assert_eq_m512i(r, e);
9558 }
9559
9560 #[simd_test(enable = "avx512dq")]
9561 fn test_mm512_mask_cvtt_roundps_epi64() {
9562 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9563 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9564 let r = _mm512_mask_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9565 let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9566 assert_eq_m512i(r, e);
9567 }
9568
9569 #[simd_test(enable = "avx512dq")]
9570 fn test_mm512_maskz_cvtt_roundps_epi64() {
9571 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9572 let r = _mm512_maskz_cvtt_roundps_epi64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9573 let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9574 assert_eq_m512i(r, e);
9575 }
9576
9577 #[simd_test(enable = "avx512dq,avx512vl")]
9578 fn test_mm_cvttps_epi64() {
9579 let a = _mm_set_ps(1., 2., 3., 4.);
9580 let r = _mm_cvttps_epi64(a);
9581 let e = _mm_set_epi64x(3, 4);
9582 assert_eq_m128i(r, e);
9583 }
9584
9585 #[simd_test(enable = "avx512dq,avx512vl")]
9586 fn test_mm_mask_cvttps_epi64() {
9587 let a = _mm_set_ps(1., 2., 3., 4.);
9588 let b = _mm_set_epi64x(5, 6);
9589 let r = _mm_mask_cvttps_epi64(b, 0b01, a);
9590 let e = _mm_set_epi64x(5, 4);
9591 assert_eq_m128i(r, e);
9592 }
9593
9594 #[simd_test(enable = "avx512dq,avx512vl")]
9595 fn test_mm_maskz_cvttps_epi64() {
9596 let a = _mm_set_ps(1., 2., 3., 4.);
9597 let r = _mm_maskz_cvttps_epi64(0b01, a);
9598 let e = _mm_set_epi64x(0, 4);
9599 assert_eq_m128i(r, e);
9600 }
9601
9602 #[simd_test(enable = "avx512dq,avx512vl")]
9603 fn test_mm256_cvttps_epi64() {
9604 let a = _mm_set_ps(1., 2., 3., 4.);
9605 let r = _mm256_cvttps_epi64(a);
9606 let e = _mm256_set_epi64x(1, 2, 3, 4);
9607 assert_eq_m256i(r, e);
9608 }
9609
9610 #[simd_test(enable = "avx512dq,avx512vl")]
9611 fn test_mm256_mask_cvttps_epi64() {
9612 let a = _mm_set_ps(1., 2., 3., 4.);
9613 let b = _mm256_set_epi64x(5, 6, 7, 8);
9614 let r = _mm256_mask_cvttps_epi64(b, 0b0110, a);
9615 let e = _mm256_set_epi64x(5, 2, 3, 8);
9616 assert_eq_m256i(r, e);
9617 }
9618
9619 #[simd_test(enable = "avx512dq,avx512vl")]
9620 fn test_mm256_maskz_cvttps_epi64() {
9621 let a = _mm_set_ps(1., 2., 3., 4.);
9622 let r = _mm256_maskz_cvttps_epi64(0b0110, a);
9623 let e = _mm256_set_epi64x(0, 2, 3, 0);
9624 assert_eq_m256i(r, e);
9625 }
9626
9627 #[simd_test(enable = "avx512dq")]
9628 fn test_mm512_cvttps_epi64() {
9629 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9630 let r = _mm512_cvttps_epi64(a);
9631 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9632 assert_eq_m512i(r, e);
9633 }
9634
9635 #[simd_test(enable = "avx512dq")]
9636 fn test_mm512_mask_cvttps_epi64() {
9637 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9638 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9639 let r = _mm512_mask_cvttps_epi64(b, 0b01101001, a);
9640 let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9641 assert_eq_m512i(r, e);
9642 }
9643
9644 #[simd_test(enable = "avx512dq")]
9645 fn test_mm512_maskz_cvttps_epi64() {
9646 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9647 let r = _mm512_maskz_cvttps_epi64(0b01101001, a);
9648 let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9649 assert_eq_m512i(r, e);
9650 }
9651
9652 #[simd_test(enable = "avx512dq")]
9653 fn test_mm512_cvtt_roundpd_epu64() {
9654 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9655 let r = _mm512_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(a);
9656 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9657 assert_eq_m512i(r, e);
9658 }
9659
9660 #[simd_test(enable = "avx512dq")]
9661 fn test_mm512_mask_cvtt_roundpd_epu64() {
9662 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9663 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9664 let r = _mm512_mask_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9665 let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9666 assert_eq_m512i(r, e);
9667 }
9668
9669 #[simd_test(enable = "avx512dq")]
9670 fn test_mm512_maskz_cvtt_roundpd_epu64() {
9671 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9672 let r = _mm512_maskz_cvtt_roundpd_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9673 let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9674 assert_eq_m512i(r, e);
9675 }
9676
9677 #[simd_test(enable = "avx512dq,avx512vl")]
9678 fn test_mm_cvttpd_epu64() {
9679 let a = _mm_set_pd(1., 2.);
9680 let r = _mm_cvttpd_epu64(a);
9681 let e = _mm_set_epi64x(1, 2);
9682 assert_eq_m128i(r, e);
9683 }
9684
9685 #[simd_test(enable = "avx512dq,avx512vl")]
9686 fn test_mm_mask_cvttpd_epu64() {
9687 let a = _mm_set_pd(1., 2.);
9688 let b = _mm_set_epi64x(3, 4);
9689 let r = _mm_mask_cvttpd_epu64(b, 0b01, a);
9690 let e = _mm_set_epi64x(3, 2);
9691 assert_eq_m128i(r, e);
9692 }
9693
9694 #[simd_test(enable = "avx512dq,avx512vl")]
9695 fn test_mm_maskz_cvttpd_epu64() {
9696 let a = _mm_set_pd(1., 2.);
9697 let r = _mm_maskz_cvttpd_epu64(0b01, a);
9698 let e = _mm_set_epi64x(0, 2);
9699 assert_eq_m128i(r, e);
9700 }
9701
9702 #[simd_test(enable = "avx512dq,avx512vl")]
9703 fn test_mm256_cvttpd_epu64() {
9704 let a = _mm256_set_pd(1., 2., 3., 4.);
9705 let r = _mm256_cvttpd_epu64(a);
9706 let e = _mm256_set_epi64x(1, 2, 3, 4);
9707 assert_eq_m256i(r, e);
9708 }
9709
9710 #[simd_test(enable = "avx512dq,avx512vl")]
9711 fn test_mm256_mask_cvttpd_epu64() {
9712 let a = _mm256_set_pd(1., 2., 3., 4.);
9713 let b = _mm256_set_epi64x(5, 6, 7, 8);
9714 let r = _mm256_mask_cvttpd_epu64(b, 0b0110, a);
9715 let e = _mm256_set_epi64x(5, 2, 3, 8);
9716 assert_eq_m256i(r, e);
9717 }
9718
9719 #[simd_test(enable = "avx512dq,avx512vl")]
9720 fn test_mm256_maskz_cvttpd_epu64() {
9721 let a = _mm256_set_pd(1., 2., 3., 4.);
9722 let r = _mm256_maskz_cvttpd_epu64(0b0110, a);
9723 let e = _mm256_set_epi64x(0, 2, 3, 0);
9724 assert_eq_m256i(r, e);
9725 }
9726
9727 #[simd_test(enable = "avx512dq")]
9728 fn test_mm512_cvttpd_epu64() {
9729 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9730 let r = _mm512_cvttpd_epu64(a);
9731 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9732 assert_eq_m512i(r, e);
9733 }
9734
9735 #[simd_test(enable = "avx512dq")]
9736 fn test_mm512_mask_cvttpd_epu64() {
9737 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9738 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9739 let r = _mm512_mask_cvttpd_epu64(b, 0b01101001, a);
9740 let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9741 assert_eq_m512i(r, e);
9742 }
9743
9744 #[simd_test(enable = "avx512dq")]
9745 fn test_mm512_maskz_cvttpd_epu64() {
9746 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
9747 let r = _mm512_maskz_cvttpd_epu64(0b01101001, a);
9748 let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9749 assert_eq_m512i(r, e);
9750 }
9751
9752 #[simd_test(enable = "avx512dq")]
9753 fn test_mm512_cvtt_roundps_epu64() {
9754 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9755 let r = _mm512_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(a);
9756 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9757 assert_eq_m512i(r, e);
9758 }
9759
9760 #[simd_test(enable = "avx512dq")]
9761 fn test_mm512_mask_cvtt_roundps_epu64() {
9762 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9763 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9764 let r = _mm512_mask_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(b, 0b01101001, a);
9765 let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9766 assert_eq_m512i(r, e);
9767 }
9768
9769 #[simd_test(enable = "avx512dq")]
9770 fn test_mm512_maskz_cvtt_roundps_epu64() {
9771 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9772 let r = _mm512_maskz_cvtt_roundps_epu64::<_MM_FROUND_NO_EXC>(0b01101001, a);
9773 let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9774 assert_eq_m512i(r, e);
9775 }
9776
9777 #[simd_test(enable = "avx512dq,avx512vl")]
9778 fn test_mm_cvttps_epu64() {
9779 let a = _mm_set_ps(1., 2., 3., 4.);
9780 let r = _mm_cvttps_epu64(a);
9781 let e = _mm_set_epi64x(3, 4);
9782 assert_eq_m128i(r, e);
9783 }
9784
9785 #[simd_test(enable = "avx512dq,avx512vl")]
9786 fn test_mm_mask_cvttps_epu64() {
9787 let a = _mm_set_ps(1., 2., 3., 4.);
9788 let b = _mm_set_epi64x(5, 6);
9789 let r = _mm_mask_cvttps_epu64(b, 0b01, a);
9790 let e = _mm_set_epi64x(5, 4);
9791 assert_eq_m128i(r, e);
9792 }
9793
9794 #[simd_test(enable = "avx512dq,avx512vl")]
9795 fn test_mm_maskz_cvttps_epu64() {
9796 let a = _mm_set_ps(1., 2., 3., 4.);
9797 let r = _mm_maskz_cvttps_epu64(0b01, a);
9798 let e = _mm_set_epi64x(0, 4);
9799 assert_eq_m128i(r, e);
9800 }
9801
9802 #[simd_test(enable = "avx512dq,avx512vl")]
9803 fn test_mm256_cvttps_epu64() {
9804 let a = _mm_set_ps(1., 2., 3., 4.);
9805 let r = _mm256_cvttps_epu64(a);
9806 let e = _mm256_set_epi64x(1, 2, 3, 4);
9807 assert_eq_m256i(r, e);
9808 }
9809
9810 #[simd_test(enable = "avx512dq,avx512vl")]
9811 fn test_mm256_mask_cvttps_epu64() {
9812 let a = _mm_set_ps(1., 2., 3., 4.);
9813 let b = _mm256_set_epi64x(5, 6, 7, 8);
9814 let r = _mm256_mask_cvttps_epu64(b, 0b0110, a);
9815 let e = _mm256_set_epi64x(5, 2, 3, 8);
9816 assert_eq_m256i(r, e);
9817 }
9818
9819 #[simd_test(enable = "avx512dq,avx512vl")]
9820 fn test_mm256_maskz_cvttps_epu64() {
9821 let a = _mm_set_ps(1., 2., 3., 4.);
9822 let r = _mm256_maskz_cvttps_epu64(0b0110, a);
9823 let e = _mm256_set_epi64x(0, 2, 3, 0);
9824 assert_eq_m256i(r, e);
9825 }
9826
9827 #[simd_test(enable = "avx512dq")]
9828 fn test_mm512_cvttps_epu64() {
9829 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9830 let r = _mm512_cvttps_epu64(a);
9831 let e = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9832 assert_eq_m512i(r, e);
9833 }
9834
9835 #[simd_test(enable = "avx512dq")]
9836 fn test_mm512_mask_cvttps_epu64() {
9837 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9838 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9839 let r = _mm512_mask_cvttps_epu64(b, 0b01101001, a);
9840 let e = _mm512_set_epi64(9, 2, 3, 12, 5, 14, 15, 8);
9841 assert_eq_m512i(r, e);
9842 }
9843
9844 #[simd_test(enable = "avx512dq")]
9845 fn test_mm512_maskz_cvttps_epu64() {
9846 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
9847 let r = _mm512_maskz_cvttps_epu64(0b01101001, a);
9848 let e = _mm512_set_epi64(0, 2, 3, 0, 5, 0, 0, 8);
9849 assert_eq_m512i(r, e);
9850 }
9851
9852 #[simd_test(enable = "avx512dq,avx512vl")]
9853 const fn test_mm_mullo_epi64() {
9854 let a = _mm_set_epi64x(1, 2);
9855 let b = _mm_set_epi64x(3, 4);
9856 let r = _mm_mullo_epi64(a, b);
9857 let e = _mm_set_epi64x(3, 8);
9858 assert_eq_m128i(r, e);
9859 }
9860
9861 #[simd_test(enable = "avx512dq,avx512vl")]
9862 const fn test_mm_mask_mullo_epi64() {
9863 let a = _mm_set_epi64x(1, 2);
9864 let b = _mm_set_epi64x(3, 4);
9865 let c = _mm_set_epi64x(5, 6);
9866 let r = _mm_mask_mullo_epi64(c, 0b01, a, b);
9867 let e = _mm_set_epi64x(5, 8);
9868 assert_eq_m128i(r, e);
9869 }
9870
9871 #[simd_test(enable = "avx512dq,avx512vl")]
9872 const fn test_mm_maskz_mullo_epi64() {
9873 let a = _mm_set_epi64x(1, 2);
9874 let b = _mm_set_epi64x(3, 4);
9875 let r = _mm_maskz_mullo_epi64(0b01, a, b);
9876 let e = _mm_set_epi64x(0, 8);
9877 assert_eq_m128i(r, e);
9878 }
9879
9880 #[simd_test(enable = "avx512dq,avx512vl")]
9881 const fn test_mm256_mullo_epi64() {
9882 let a = _mm256_set_epi64x(1, 2, 3, 4);
9883 let b = _mm256_set_epi64x(5, 6, 7, 8);
9884 let r = _mm256_mullo_epi64(a, b);
9885 let e = _mm256_set_epi64x(5, 12, 21, 32);
9886 assert_eq_m256i(r, e);
9887 }
9888
9889 #[simd_test(enable = "avx512dq,avx512vl")]
9890 const fn test_mm256_mask_mullo_epi64() {
9891 let a = _mm256_set_epi64x(1, 2, 3, 4);
9892 let b = _mm256_set_epi64x(5, 6, 7, 8);
9893 let c = _mm256_set_epi64x(9, 10, 11, 12);
9894 let r = _mm256_mask_mullo_epi64(c, 0b0110, a, b);
9895 let e = _mm256_set_epi64x(9, 12, 21, 12);
9896 assert_eq_m256i(r, e);
9897 }
9898
9899 #[simd_test(enable = "avx512dq,avx512vl")]
9900 const fn test_mm256_maskz_mullo_epi64() {
9901 let a = _mm256_set_epi64x(1, 2, 3, 4);
9902 let b = _mm256_set_epi64x(5, 6, 7, 8);
9903 let r = _mm256_maskz_mullo_epi64(0b0110, a, b);
9904 let e = _mm256_set_epi64x(0, 12, 21, 0);
9905 assert_eq_m256i(r, e);
9906 }
9907
9908 #[simd_test(enable = "avx512dq")]
9909 const fn test_mm512_mullo_epi64() {
9910 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9911 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9912 let r = _mm512_mullo_epi64(a, b);
9913 let e = _mm512_set_epi64(9, 20, 33, 48, 65, 84, 105, 128);
9914 assert_eq_m512i(r, e);
9915 }
9916
9917 #[simd_test(enable = "avx512dq")]
9918 const fn test_mm512_mask_mullo_epi64() {
9919 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9920 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9921 let c = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
9922 let r = _mm512_mask_mullo_epi64(c, 0b01101001, a, b);
9923 let e = _mm512_set_epi64(17, 20, 33, 20, 65, 22, 23, 128);
9924 assert_eq_m512i(r, e);
9925 }
9926
9927 #[simd_test(enable = "avx512dq")]
9928 const fn test_mm512_maskz_mullo_epi64() {
9929 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
9930 let b = _mm512_set_epi64(9, 10, 11, 12, 13, 14, 15, 16);
9931 let r = _mm512_maskz_mullo_epi64(0b01101001, a, b);
9932 let e = _mm512_set_epi64(0, 20, 33, 0, 65, 0, 0, 128);
9933 assert_eq_m512i(r, e);
9934 }
9935
9936 #[simd_test(enable = "avx512dq")]
9937 const fn test_cvtmask8_u32() {
9938 let a: __mmask8 = 0b01101001;
9939 let r = _cvtmask8_u32(a);
9940 let e: u32 = 0b01101001;
9941 assert_eq!(r, e);
9942 }
9943
9944 #[simd_test(enable = "avx512dq")]
9945 const fn test_cvtu32_mask8() {
9946 let a: u32 = 0b01101001;
9947 let r = _cvtu32_mask8(a);
9948 let e: __mmask8 = 0b01101001;
9949 assert_eq!(r, e);
9950 }
9951
9952 #[simd_test(enable = "avx512dq")]
9953 const fn test_kadd_mask16() {
9954 let a: __mmask16 = 27549;
9955 let b: __mmask16 = 23434;
9956 let r = _kadd_mask16(a, b);
9957 let e: __mmask16 = 50983;
9958 assert_eq!(r, e);
9959 }
9960
9961 #[simd_test(enable = "avx512dq")]
9962 const fn test_kadd_mask8() {
9963 let a: __mmask8 = 98;
9964 let b: __mmask8 = 117;
9965 let r = _kadd_mask8(a, b);
9966 let e: __mmask8 = 215;
9967 assert_eq!(r, e);
9968 }
9969
9970 #[simd_test(enable = "avx512dq")]
9971 const fn test_kand_mask8() {
9972 let a: __mmask8 = 0b01101001;
9973 let b: __mmask8 = 0b10110011;
9974 let r = _kand_mask8(a, b);
9975 let e: __mmask8 = 0b00100001;
9976 assert_eq!(r, e);
9977 }
9978
9979 #[simd_test(enable = "avx512dq")]
9980 const fn test_kandn_mask8() {
9981 let a: __mmask8 = 0b01101001;
9982 let b: __mmask8 = 0b10110011;
9983 let r = _kandn_mask8(a, b);
9984 let e: __mmask8 = 0b10010010;
9985 assert_eq!(r, e);
9986 }
9987
9988 #[simd_test(enable = "avx512dq")]
9989 const fn test_knot_mask8() {
9990 let a: __mmask8 = 0b01101001;
9991 let r = _knot_mask8(a);
9992 let e: __mmask8 = 0b10010110;
9993 assert_eq!(r, e);
9994 }
9995
9996 #[simd_test(enable = "avx512dq")]
9997 const fn test_kor_mask8() {
9998 let a: __mmask8 = 0b01101001;
9999 let b: __mmask8 = 0b10110011;
10000 let r = _kor_mask8(a, b);
10001 let e: __mmask8 = 0b11111011;
10002 assert_eq!(r, e);
10003 }
10004
10005 #[simd_test(enable = "avx512dq")]
10006 const fn test_kxnor_mask8() {
10007 let a: __mmask8 = 0b01101001;
10008 let b: __mmask8 = 0b10110011;
10009 let r = _kxnor_mask8(a, b);
10010 let e: __mmask8 = 0b00100101;
10011 assert_eq!(r, e);
10012 }
10013
10014 #[simd_test(enable = "avx512dq")]
10015 const fn test_kxor_mask8() {
10016 let a: __mmask8 = 0b01101001;
10017 let b: __mmask8 = 0b10110011;
10018 let r = _kxor_mask8(a, b);
10019 let e: __mmask8 = 0b11011010;
10020 assert_eq!(r, e);
10021 }
10022
10023 #[simd_test(enable = "avx512dq")]
10024 const fn test_kortest_mask8_u8() {
10025 let a: __mmask8 = 0b01101001;
10026 let b: __mmask8 = 0b10110110;
10027 let mut all_ones: u8 = 0;
10028 let r = unsafe { _kortest_mask8_u8(a, b, &mut all_ones) };
10029 assert_eq!(r, 0);
10030 assert_eq!(all_ones, 1);
10031 }
10032
10033 #[simd_test(enable = "avx512dq")]
10034 const fn test_kortestc_mask8_u8() {
10035 let a: __mmask8 = 0b01101001;
10036 let b: __mmask8 = 0b10110110;
10037 let r = _kortestc_mask8_u8(a, b);
10038 assert_eq!(r, 1);
10039 }
10040
10041 #[simd_test(enable = "avx512dq")]
10042 const fn test_kortestz_mask8_u8() {
10043 let a: __mmask8 = 0b01101001;
10044 let b: __mmask8 = 0b10110110;
10045 let r = _kortestz_mask8_u8(a, b);
10046 assert_eq!(r, 0);
10047 }
10048
10049 #[simd_test(enable = "avx512dq")]
10050 const fn test_kshiftli_mask8() {
10051 let a: __mmask8 = 0b01101001;
10052 let r = _kshiftli_mask8::<3>(a);
10053 let e: __mmask8 = 0b01001000;
10054 assert_eq!(r, e);
10055
10056 let r = _kshiftli_mask8::<7>(a);
10057 let e: __mmask8 = 0b10000000;
10058 assert_eq!(r, e);
10059
10060 let r = _kshiftli_mask8::<8>(a);
10061 let e: __mmask8 = 0b00000000;
10062 assert_eq!(r, e);
10063
10064 let r = _kshiftli_mask8::<9>(a);
10065 let e: __mmask8 = 0b00000000;
10066 assert_eq!(r, e);
10067 }
10068
10069 #[simd_test(enable = "avx512dq")]
10070 const fn test_kshiftri_mask8() {
10071 let a: __mmask8 = 0b10101001;
10072 let r = _kshiftri_mask8::<3>(a);
10073 let e: __mmask8 = 0b00010101;
10074 assert_eq!(r, e);
10075
10076 let r = _kshiftri_mask8::<7>(a);
10077 let e: __mmask8 = 0b00000001;
10078 assert_eq!(r, e);
10079
10080 let r = _kshiftri_mask8::<8>(a);
10081 let e: __mmask8 = 0b00000000;
10082 assert_eq!(r, e);
10083
10084 let r = _kshiftri_mask8::<9>(a);
10085 let e: __mmask8 = 0b00000000;
10086 assert_eq!(r, e);
10087 }
10088
10089 #[simd_test(enable = "avx512dq")]
10090 const fn test_ktest_mask8_u8() {
10091 let a: __mmask8 = 0b01101001;
10092 let b: __mmask8 = 0b10010110;
10093 let mut and_not: u8 = 0;
10094 let r = unsafe { _ktest_mask8_u8(a, b, &mut and_not) };
10095 assert_eq!(r, 1);
10096 assert_eq!(and_not, 0);
10097 }
10098
10099 #[simd_test(enable = "avx512dq")]
10100 const fn test_ktestc_mask8_u8() {
10101 let a: __mmask8 = 0b01101001;
10102 let b: __mmask8 = 0b10010110;
10103 let r = _ktestc_mask8_u8(a, b);
10104 assert_eq!(r, 0);
10105 }
10106
10107 #[simd_test(enable = "avx512dq")]
10108 const fn test_ktestz_mask8_u8() {
10109 let a: __mmask8 = 0b01101001;
10110 let b: __mmask8 = 0b10010110;
10111 let r = _ktestz_mask8_u8(a, b);
10112 assert_eq!(r, 1);
10113 }
10114
10115 #[simd_test(enable = "avx512dq")]
10116 const fn test_ktest_mask16_u8() {
10117 let a: __mmask16 = 0b0110100100111100;
10118 let b: __mmask16 = 0b1001011011000011;
10119 let mut and_not: u8 = 0;
10120 let r = unsafe { _ktest_mask16_u8(a, b, &mut and_not) };
10121 assert_eq!(r, 1);
10122 assert_eq!(and_not, 0);
10123 }
10124
10125 #[simd_test(enable = "avx512dq")]
10126 const fn test_ktestc_mask16_u8() {
10127 let a: __mmask16 = 0b0110100100111100;
10128 let b: __mmask16 = 0b1001011011000011;
10129 let r = _ktestc_mask16_u8(a, b);
10130 assert_eq!(r, 0);
10131 }
10132
10133 #[simd_test(enable = "avx512dq")]
10134 const fn test_ktestz_mask16_u8() {
10135 let a: __mmask16 = 0b0110100100111100;
10136 let b: __mmask16 = 0b1001011011000011;
10137 let r = _ktestz_mask16_u8(a, b);
10138 assert_eq!(r, 1);
10139 }
10140
10141 #[simd_test(enable = "avx512dq")]
10142 const fn test_load_mask8() {
10143 let a: __mmask8 = 0b01101001;
10144 let r = unsafe { _load_mask8(&a) };
10145 let e: __mmask8 = 0b01101001;
10146 assert_eq!(r, e);
10147 }
10148
10149 #[simd_test(enable = "avx512dq")]
10150 const fn test_store_mask8() {
10151 let a: __mmask8 = 0b01101001;
10152 let mut r = 0;
10153 unsafe {
10154 _store_mask8(&mut r, a);
10155 }
10156 let e: __mmask8 = 0b01101001;
10157 assert_eq!(r, e);
10158 }
10159
10160 #[simd_test(enable = "avx512dq,avx512vl")]
10161 const fn test_mm_movepi32_mask() {
10162 let a = _mm_set_epi32(0, -2, -3, 4);
10163 let r = _mm_movepi32_mask(a);
10164 let e = 0b0110;
10165 assert_eq!(r, e);
10166 }
10167
10168 #[simd_test(enable = "avx512dq,avx512vl")]
10169 const fn test_mm256_movepi32_mask() {
10170 let a = _mm256_set_epi32(0, -2, -3, 4, -5, 6, 7, -8);
10171 let r = _mm256_movepi32_mask(a);
10172 let e = 0b01101001;
10173 assert_eq!(r, e);
10174 }
10175
10176 #[simd_test(enable = "avx512dq")]
10177 const fn test_mm512_movepi32_mask() {
10178 let a = _mm512_set_epi32(
10179 0, -2, -3, 4, -5, 6, 7, -8, 9, 10, -11, -12, -13, -14, 15, 16,
10180 );
10181 let r = _mm512_movepi32_mask(a);
10182 let e = 0b0110100100111100;
10183 assert_eq!(r, e);
10184 }
10185
10186 #[simd_test(enable = "avx512dq,avx512vl")]
10187 const fn test_mm_movepi64_mask() {
10188 let a = _mm_set_epi64x(0, -2);
10189 let r = _mm_movepi64_mask(a);
10190 let e = 0b01;
10191 assert_eq!(r, e);
10192 }
10193
10194 #[simd_test(enable = "avx512dq,avx512vl")]
10195 const fn test_mm256_movepi64_mask() {
10196 let a = _mm256_set_epi64x(0, -2, -3, 4);
10197 let r = _mm256_movepi64_mask(a);
10198 let e = 0b0110;
10199 assert_eq!(r, e);
10200 }
10201
10202 #[simd_test(enable = "avx512dq")]
10203 const fn test_mm512_movepi64_mask() {
10204 let a = _mm512_set_epi64(0, -2, -3, 4, -5, 6, 7, -8);
10205 let r = _mm512_movepi64_mask(a);
10206 let e = 0b01101001;
10207 assert_eq!(r, e);
10208 }
10209
10210 #[simd_test(enable = "avx512dq,avx512vl")]
10211 const fn test_mm_movm_epi32() {
10212 let a = 0b0110;
10213 let r = _mm_movm_epi32(a);
10214 let e = _mm_set_epi32(0, -1, -1, 0);
10215 assert_eq_m128i(r, e);
10216 }
10217
10218 #[simd_test(enable = "avx512dq,avx512vl")]
10219 const fn test_mm256_movm_epi32() {
10220 let a = 0b01101001;
10221 let r = _mm256_movm_epi32(a);
10222 let e = _mm256_set_epi32(0, -1, -1, 0, -1, 0, 0, -1);
10223 assert_eq_m256i(r, e);
10224 }
10225
10226 #[simd_test(enable = "avx512dq")]
10227 const fn test_mm512_movm_epi32() {
10228 let a = 0b0110100100111100;
10229 let r = _mm512_movm_epi32(a);
10230 let e = _mm512_set_epi32(0, -1, -1, 0, -1, 0, 0, -1, 0, 0, -1, -1, -1, -1, 0, 0);
10231 assert_eq_m512i(r, e);
10232 }
10233
10234 #[simd_test(enable = "avx512dq,avx512vl")]
10235 const fn test_mm_movm_epi64() {
10236 let a = 0b01;
10237 let r = _mm_movm_epi64(a);
10238 let e = _mm_set_epi64x(0, -1);
10239 assert_eq_m128i(r, e);
10240 }
10241
10242 #[simd_test(enable = "avx512dq,avx512vl")]
10243 const fn test_mm256_movm_epi64() {
10244 let a = 0b0110;
10245 let r = _mm256_movm_epi64(a);
10246 let e = _mm256_set_epi64x(0, -1, -1, 0);
10247 assert_eq_m256i(r, e);
10248 }
10249
10250 #[simd_test(enable = "avx512dq")]
10251 const fn test_mm512_movm_epi64() {
10252 let a = 0b01101001;
10253 let r = _mm512_movm_epi64(a);
10254 let e = _mm512_set_epi64(0, -1, -1, 0, -1, 0, 0, -1);
10255 assert_eq_m512i(r, e);
10256 }
10257
10258 #[simd_test(enable = "avx512dq")]
10259 fn test_mm512_range_round_pd() {
10260 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10261 let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10262 let r = _mm512_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(a, b);
10263 let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.);
10264 assert_eq_m512d(r, e);
10265 }
10266
10267 #[simd_test(enable = "avx512dq")]
10268 fn test_mm512_mask_range_round_pd() {
10269 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10270 let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10271 let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
10272 let r = _mm512_mask_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b01101001, a, b);
10273 let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.);
10274 assert_eq_m512d(r, e);
10275 }
10276
10277 #[simd_test(enable = "avx512dq")]
10278 fn test_mm512_maskz_range_round_pd() {
10279 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10280 let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10281 let r = _mm512_maskz_range_round_pd::<0b0101, _MM_FROUND_NO_EXC>(0b01101001, a, b);
10282 let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.);
10283 assert_eq_m512d(r, e);
10284 }
10285
10286 #[simd_test(enable = "avx512dq,avx512vl")]
10287 fn test_mm_range_pd() {
10288 let a = _mm_set_pd(1., 2.);
10289 let b = _mm_set_pd(2., 1.);
10290 let r = _mm_range_pd::<0b0101>(a, b);
10291 let e = _mm_set_pd(2., 2.);
10292 assert_eq_m128d(r, e);
10293 }
10294
10295 #[simd_test(enable = "avx512dq,avx512vl")]
10296 fn test_mm_mask_range_pd() {
10297 let a = _mm_set_pd(1., 2.);
10298 let b = _mm_set_pd(2., 1.);
10299 let c = _mm_set_pd(3., 4.);
10300 let r = _mm_mask_range_pd::<0b0101>(c, 0b01, a, b);
10301 let e = _mm_set_pd(3., 2.);
10302 assert_eq_m128d(r, e);
10303 }
10304
10305 #[simd_test(enable = "avx512dq,avx512vl")]
10306 fn test_mm_maskz_range_pd() {
10307 let a = _mm_set_pd(1., 2.);
10308 let b = _mm_set_pd(2., 1.);
10309 let r = _mm_maskz_range_pd::<0b0101>(0b01, a, b);
10310 let e = _mm_set_pd(0., 2.);
10311 assert_eq_m128d(r, e);
10312 }
10313
10314 #[simd_test(enable = "avx512dq,avx512vl")]
10315 fn test_mm256_range_pd() {
10316 let a = _mm256_set_pd(1., 2., 3., 4.);
10317 let b = _mm256_set_pd(2., 1., 4., 3.);
10318 let r = _mm256_range_pd::<0b0101>(a, b);
10319 let e = _mm256_set_pd(2., 2., 4., 4.);
10320 assert_eq_m256d(r, e);
10321 }
10322
10323 #[simd_test(enable = "avx512dq,avx512vl")]
10324 fn test_mm256_mask_range_pd() {
10325 let a = _mm256_set_pd(1., 2., 3., 4.);
10326 let b = _mm256_set_pd(2., 1., 4., 3.);
10327 let c = _mm256_set_pd(5., 6., 7., 8.);
10328 let r = _mm256_mask_range_pd::<0b0101>(c, 0b0110, a, b);
10329 let e = _mm256_set_pd(5., 2., 4., 8.);
10330 assert_eq_m256d(r, e);
10331 }
10332
10333 #[simd_test(enable = "avx512dq,avx512vl")]
10334 fn test_mm256_maskz_range_pd() {
10335 let a = _mm256_set_pd(1., 2., 3., 4.);
10336 let b = _mm256_set_pd(2., 1., 4., 3.);
10337 let r = _mm256_maskz_range_pd::<0b0101>(0b0110, a, b);
10338 let e = _mm256_set_pd(0., 2., 4., 0.);
10339 assert_eq_m256d(r, e);
10340 }
10341
10342 #[simd_test(enable = "avx512dq")]
10343 fn test_mm512_range_pd() {
10344 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10345 let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10346 let r = _mm512_range_pd::<0b0101>(a, b);
10347 let e = _mm512_set_pd(2., 2., 4., 4., 6., 6., 8., 8.);
10348 assert_eq_m512d(r, e);
10349 }
10350
10351 #[simd_test(enable = "avx512dq")]
10352 fn test_mm512_mask_range_pd() {
10353 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10354 let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10355 let c = _mm512_set_pd(9., 10., 11., 12., 13., 14., 15., 16.);
10356 let r = _mm512_mask_range_pd::<0b0101>(c, 0b01101001, a, b);
10357 let e = _mm512_set_pd(9., 2., 4., 12., 6., 14., 15., 8.);
10358 assert_eq_m512d(r, e);
10359 }
10360
10361 #[simd_test(enable = "avx512dq")]
10362 fn test_mm512_maskz_range_pd() {
10363 let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
10364 let b = _mm512_set_pd(2., 1., 4., 3., 6., 5., 8., 7.);
10365 let r = _mm512_maskz_range_pd::<0b0101>(0b01101001, a, b);
10366 let e = _mm512_set_pd(0., 2., 4., 0., 6., 0., 0., 8.);
10367 assert_eq_m512d(r, e);
10368 }
10369
10370 #[simd_test(enable = "avx512dq")]
10371 fn test_mm512_range_round_ps() {
10372 let a = _mm512_set_ps(
10373 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10374 );
10375 let b = _mm512_set_ps(
10376 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10377 );
10378 let r = _mm512_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(a, b);
10379 let e = _mm512_set_ps(
10380 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
10381 );
10382 assert_eq_m512(r, e);
10383 }
10384
10385 #[simd_test(enable = "avx512dq")]
10386 fn test_mm512_mask_range_round_ps() {
10387 let a = _mm512_set_ps(
10388 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10389 );
10390 let b = _mm512_set_ps(
10391 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10392 );
10393 let c = _mm512_set_ps(
10394 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
10395 );
10396 let r =
10397 _mm512_mask_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0110100100111100, a, b);
10398 let e = _mm512_set_ps(
10399 17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32.,
10400 );
10401 assert_eq_m512(r, e);
10402 }
10403
10404 #[simd_test(enable = "avx512dq")]
10405 fn test_mm512_maskz_range_round_ps() {
10406 let a = _mm512_set_ps(
10407 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10408 );
10409 let b = _mm512_set_ps(
10410 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10411 );
10412 let r = _mm512_maskz_range_round_ps::<0b0101, _MM_FROUND_NO_EXC>(0b0110100100111100, a, b);
10413 let e = _mm512_set_ps(
10414 0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0.,
10415 );
10416 assert_eq_m512(r, e);
10417 }
10418
10419 #[simd_test(enable = "avx512dq,avx512vl")]
10420 fn test_mm_range_ps() {
10421 let a = _mm_set_ps(1., 2., 3., 4.);
10422 let b = _mm_set_ps(2., 1., 4., 3.);
10423 let r = _mm_range_ps::<0b0101>(a, b);
10424 let e = _mm_set_ps(2., 2., 4., 4.);
10425 assert_eq_m128(r, e);
10426 }
10427
10428 #[simd_test(enable = "avx512dq,avx512vl")]
10429 fn test_mm_mask_range_ps() {
10430 let a = _mm_set_ps(1., 2., 3., 4.);
10431 let b = _mm_set_ps(2., 1., 4., 3.);
10432 let c = _mm_set_ps(5., 6., 7., 8.);
10433 let r = _mm_mask_range_ps::<0b0101>(c, 0b0110, a, b);
10434 let e = _mm_set_ps(5., 2., 4., 8.);
10435 assert_eq_m128(r, e);
10436 }
10437
10438 #[simd_test(enable = "avx512dq,avx512vl")]
10439 fn test_mm_maskz_range_ps() {
10440 let a = _mm_set_ps(1., 2., 3., 4.);
10441 let b = _mm_set_ps(2., 1., 4., 3.);
10442 let r = _mm_maskz_range_ps::<0b0101>(0b0110, a, b);
10443 let e = _mm_set_ps(0., 2., 4., 0.);
10444 assert_eq_m128(r, e);
10445 }
10446
10447 #[simd_test(enable = "avx512dq,avx512vl")]
10448 fn test_mm256_range_ps() {
10449 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
10450 let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
10451 let r = _mm256_range_ps::<0b0101>(a, b);
10452 let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
10453 assert_eq_m256(r, e);
10454 }
10455
10456 #[simd_test(enable = "avx512dq,avx512vl")]
10457 fn test_mm256_mask_range_ps() {
10458 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
10459 let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
10460 let c = _mm256_set_ps(9., 10., 11., 12., 13., 14., 15., 16.);
10461 let r = _mm256_mask_range_ps::<0b0101>(c, 0b01101001, a, b);
10462 let e = _mm256_set_ps(9., 2., 4., 12., 6., 14., 15., 8.);
10463 assert_eq_m256(r, e);
10464 }
10465
10466 #[simd_test(enable = "avx512dq,avx512vl")]
10467 fn test_mm256_maskz_range_ps() {
10468 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
10469 let b = _mm256_set_ps(2., 1., 4., 3., 6., 5., 8., 7.);
10470 let r = _mm256_maskz_range_ps::<0b0101>(0b01101001, a, b);
10471 let e = _mm256_set_ps(0., 2., 4., 0., 6., 0., 0., 8.);
10472 assert_eq_m256(r, e);
10473 }
10474
10475 #[simd_test(enable = "avx512dq")]
10476 fn test_mm512_range_ps() {
10477 let a = _mm512_set_ps(
10478 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10479 );
10480 let b = _mm512_set_ps(
10481 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10482 );
10483 let r = _mm512_range_ps::<0b0101>(a, b);
10484 let e = _mm512_set_ps(
10485 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
10486 );
10487 assert_eq_m512(r, e);
10488 }
10489
10490 #[simd_test(enable = "avx512dq")]
10491 fn test_mm512_mask_range_ps() {
10492 let a = _mm512_set_ps(
10493 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10494 );
10495 let b = _mm512_set_ps(
10496 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10497 );
10498 let c = _mm512_set_ps(
10499 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
10500 );
10501 let r = _mm512_mask_range_ps::<0b0101>(c, 0b0110100100111100, a, b);
10502 let e = _mm512_set_ps(
10503 17., 2., 4., 20., 6., 22., 23., 8., 25., 26., 12., 12., 14., 14., 31., 32.,
10504 );
10505 assert_eq_m512(r, e);
10506 }
10507
10508 #[simd_test(enable = "avx512dq")]
10509 fn test_mm512_maskz_range_ps() {
10510 let a = _mm512_set_ps(
10511 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
10512 );
10513 let b = _mm512_set_ps(
10514 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16., 15.,
10515 );
10516 let r = _mm512_maskz_range_ps::<0b0101>(0b0110100100111100, a, b);
10517 let e = _mm512_set_ps(
10518 0., 2., 4., 0., 6., 0., 0., 8., 0., 0., 12., 12., 14., 14., 0., 0.,
10519 );
10520 assert_eq_m512(r, e);
10521 }
10522
10523 #[simd_test(enable = "avx512dq")]
10524 fn test_mm_range_round_sd() {
10525 let a = _mm_set_sd(1.);
10526 let b = _mm_set_sd(2.);
10527 let r = _mm_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(a, b);
10528 let e = _mm_set_sd(2.);
10529 assert_eq_m128d(r, e);
10530 }
10531
10532 #[simd_test(enable = "avx512dq")]
10533 fn test_mm_mask_range_round_sd() {
10534 let a = _mm_set_sd(1.);
10535 let b = _mm_set_sd(2.);
10536 let c = _mm_set_sd(3.);
10537 let r = _mm_mask_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b);
10538 let e = _mm_set_sd(3.);
10539 assert_eq_m128d(r, e);
10540 }
10541
10542 #[simd_test(enable = "avx512dq")]
10543 fn test_mm_maskz_range_round_sd() {
10544 let a = _mm_set_sd(1.);
10545 let b = _mm_set_sd(2.);
10546 let r = _mm_maskz_range_round_sd::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b);
10547 let e = _mm_set_sd(0.);
10548 assert_eq_m128d(r, e);
10549 }
10550
10551 #[simd_test(enable = "avx512dq")]
10552 fn test_mm_mask_range_sd() {
10553 let a = _mm_set_sd(1.);
10554 let b = _mm_set_sd(2.);
10555 let c = _mm_set_sd(3.);
10556 let r = _mm_mask_range_sd::<0b0101>(c, 0b0, a, b);
10557 let e = _mm_set_sd(3.);
10558 assert_eq_m128d(r, e);
10559 }
10560
10561 #[simd_test(enable = "avx512dq")]
10562 fn test_mm_maskz_range_sd() {
10563 let a = _mm_set_sd(1.);
10564 let b = _mm_set_sd(2.);
10565 let r = _mm_maskz_range_sd::<0b0101>(0b0, a, b);
10566 let e = _mm_set_sd(0.);
10567 assert_eq_m128d(r, e);
10568 }
10569
10570 #[simd_test(enable = "avx512dq")]
10571 fn test_mm_range_round_ss() {
10572 let a = _mm_set_ss(1.);
10573 let b = _mm_set_ss(2.);
10574 let r = _mm_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(a, b);
10575 let e = _mm_set_ss(2.);
10576 assert_eq_m128(r, e);
10577 }
10578
10579 #[simd_test(enable = "avx512dq")]
10580 fn test_mm_mask_range_round_ss() {
10581 let a = _mm_set_ss(1.);
10582 let b = _mm_set_ss(2.);
10583 let c = _mm_set_ss(3.);
10584 let r = _mm_mask_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(c, 0b0, a, b);
10585 let e = _mm_set_ss(3.);
10586 assert_eq_m128(r, e);
10587 }
10588
10589 #[simd_test(enable = "avx512dq")]
10590 fn test_mm_maskz_range_round_ss() {
10591 let a = _mm_set_ss(1.);
10592 let b = _mm_set_ss(2.);
10593 let r = _mm_maskz_range_round_ss::<0b0101, _MM_FROUND_NO_EXC>(0b0, a, b);
10594 let e = _mm_set_ss(0.);
10595 assert_eq_m128(r, e);
10596 }
10597
10598 #[simd_test(enable = "avx512dq")]
10599 fn test_mm_mask_range_ss() {
10600 let a = _mm_set_ss(1.);
10601 let b = _mm_set_ss(2.);
10602 let c = _mm_set_ss(3.);
10603 let r = _mm_mask_range_ss::<0b0101>(c, 0b0, a, b);
10604 let e = _mm_set_ss(3.);
10605 assert_eq_m128(r, e);
10606 }
10607
10608 #[simd_test(enable = "avx512dq")]
10609 fn test_mm_maskz_range_ss() {
10610 let a = _mm_set_ss(1.);
10611 let b = _mm_set_ss(2.);
10612 let r = _mm_maskz_range_ss::<0b0101>(0b0, a, b);
10613 let e = _mm_set_ss(0.);
10614 assert_eq_m128(r, e);
10615 }
10616
10617 #[simd_test(enable = "avx512dq")]
10618 fn test_mm512_reduce_round_pd() {
10619 let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10620 let r = _mm512_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
10621 let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
10622 assert_eq_m512d(r, e);
10623 }
10624
10625 #[simd_test(enable = "avx512dq")]
10626 fn test_mm512_mask_reduce_round_pd() {
10627 let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10628 let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
10629 let r = _mm512_mask_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10630 src, 0b01101001, a,
10631 );
10632 let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
10633 assert_eq_m512d(r, e);
10634 }
10635
10636 #[simd_test(enable = "avx512dq")]
10637 fn test_mm512_maskz_reduce_round_pd() {
10638 let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10639 let r = _mm512_maskz_reduce_round_pd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10640 0b01101001, a,
10641 );
10642 let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
10643 assert_eq_m512d(r, e);
10644 }
10645
10646 #[simd_test(enable = "avx512dq,avx512vl")]
10647 fn test_mm_reduce_pd() {
10648 let a = _mm_set_pd(0.25, 0.50);
10649 let r = _mm_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10650 let e = _mm_set_pd(0.25, 0.);
10651 assert_eq_m128d(r, e);
10652 }
10653
10654 #[simd_test(enable = "avx512dq,avx512vl")]
10655 fn test_mm_mask_reduce_pd() {
10656 let a = _mm_set_pd(0.25, 0.50);
10657 let src = _mm_set_pd(3., 4.);
10658 let r = _mm_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01, a);
10659 let e = _mm_set_pd(3., 0.);
10660 assert_eq_m128d(r, e);
10661 }
10662
10663 #[simd_test(enable = "avx512dq,avx512vl")]
10664 fn test_mm_maskz_reduce_pd() {
10665 let a = _mm_set_pd(0.25, 0.50);
10666 let r = _mm_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01, a);
10667 let e = _mm_set_pd(0., 0.);
10668 assert_eq_m128d(r, e);
10669 }
10670
10671 #[simd_test(enable = "avx512dq,avx512vl")]
10672 fn test_mm256_reduce_pd() {
10673 let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
10674 let r = _mm256_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10675 let e = _mm256_set_pd(0.25, 0., 0.25, 0.);
10676 assert_eq_m256d(r, e);
10677 }
10678
10679 #[simd_test(enable = "avx512dq,avx512vl")]
10680 fn test_mm256_mask_reduce_pd() {
10681 let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
10682 let src = _mm256_set_pd(3., 4., 5., 6.);
10683 let r = _mm256_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a);
10684 let e = _mm256_set_pd(3., 0., 0.25, 6.);
10685 assert_eq_m256d(r, e);
10686 }
10687
10688 #[simd_test(enable = "avx512dq,avx512vl")]
10689 fn test_mm256_maskz_reduce_pd() {
10690 let a = _mm256_set_pd(0.25, 0.50, 0.75, 1.0);
10691 let r = _mm256_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a);
10692 let e = _mm256_set_pd(0., 0., 0.25, 0.);
10693 assert_eq_m256d(r, e);
10694 }
10695
10696 #[simd_test(enable = "avx512dq")]
10697 fn test_mm512_reduce_pd() {
10698 let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10699 let r = _mm512_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10700 let e = _mm512_set_pd(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
10701 assert_eq_m512d(r, e);
10702 }
10703
10704 #[simd_test(enable = "avx512dq")]
10705 fn test_mm512_mask_reduce_pd() {
10706 let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10707 let src = _mm512_set_pd(3., 4., 5., 6., 7., 8., 9., 10.);
10708 let r = _mm512_mask_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a);
10709 let e = _mm512_set_pd(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
10710 assert_eq_m512d(r, e);
10711 }
10712
10713 #[simd_test(enable = "avx512dq")]
10714 fn test_mm512_maskz_reduce_pd() {
10715 let a = _mm512_set_pd(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10716 let r = _mm512_maskz_reduce_pd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a);
10717 let e = _mm512_set_pd(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
10718 assert_eq_m512d(r, e);
10719 }
10720
10721 #[simd_test(enable = "avx512dq")]
10722 fn test_mm512_reduce_round_ps() {
10723 let a = _mm512_set_ps(
10724 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10725 4.0,
10726 );
10727 let r = _mm512_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a);
10728 let e = _mm512_set_ps(
10729 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.,
10730 );
10731 assert_eq_m512(r, e);
10732 }
10733
10734 #[simd_test(enable = "avx512dq")]
10735 fn test_mm512_mask_reduce_round_ps() {
10736 let a = _mm512_set_ps(
10737 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10738 4.0,
10739 );
10740 let src = _mm512_set_ps(
10741 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
10742 );
10743 let r = _mm512_mask_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10744 src,
10745 0b0110100100111100,
10746 a,
10747 );
10748 let e = _mm512_set_ps(
10749 5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20.,
10750 );
10751 assert_eq_m512(r, e);
10752 }
10753
10754 #[simd_test(enable = "avx512dq")]
10755 fn test_mm512_maskz_reduce_round_ps() {
10756 let a = _mm512_set_ps(
10757 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10758 4.0,
10759 );
10760 let r = _mm512_maskz_reduce_round_ps::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10761 0b0110100100111100,
10762 a,
10763 );
10764 let e = _mm512_set_ps(
10765 0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0.,
10766 );
10767 assert_eq_m512(r, e);
10768 }
10769
10770 #[simd_test(enable = "avx512dq,avx512vl")]
10771 fn test_mm_reduce_ps() {
10772 let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
10773 let r = _mm_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10774 let e = _mm_set_ps(0.25, 0., 0.25, 0.);
10775 assert_eq_m128(r, e);
10776 }
10777
10778 #[simd_test(enable = "avx512dq,avx512vl")]
10779 fn test_mm_mask_reduce_ps() {
10780 let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
10781 let src = _mm_set_ps(2., 3., 4., 5.);
10782 let r = _mm_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110, a);
10783 let e = _mm_set_ps(2., 0., 0.25, 5.);
10784 assert_eq_m128(r, e);
10785 }
10786
10787 #[simd_test(enable = "avx512dq,avx512vl")]
10788 fn test_mm_maskz_reduce_ps() {
10789 let a = _mm_set_ps(0.25, 0.50, 0.75, 1.0);
10790 let r = _mm_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110, a);
10791 let e = _mm_set_ps(0., 0., 0.25, 0.);
10792 assert_eq_m128(r, e);
10793 }
10794
10795 #[simd_test(enable = "avx512dq,avx512vl")]
10796 fn test_mm256_reduce_ps() {
10797 let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10798 let r = _mm256_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10799 let e = _mm256_set_ps(0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.);
10800 assert_eq_m256(r, e);
10801 }
10802
10803 #[simd_test(enable = "avx512dq,avx512vl")]
10804 fn test_mm256_mask_reduce_ps() {
10805 let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10806 let src = _mm256_set_ps(3., 4., 5., 6., 7., 8., 9., 10.);
10807 let r = _mm256_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b01101001, a);
10808 let e = _mm256_set_ps(3., 0., 0.25, 6., 0.25, 8., 9., 0.);
10809 assert_eq_m256(r, e);
10810 }
10811
10812 #[simd_test(enable = "avx512dq,avx512vl")]
10813 fn test_mm256_maskz_reduce_ps() {
10814 let a = _mm256_set_ps(0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0);
10815 let r = _mm256_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b01101001, a);
10816 let e = _mm256_set_ps(0., 0., 0.25, 0., 0.25, 0., 0., 0.);
10817 assert_eq_m256(r, e);
10818 }
10819
10820 #[simd_test(enable = "avx512dq")]
10821 fn test_mm512_reduce_ps() {
10822 let a = _mm512_set_ps(
10823 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10824 4.0,
10825 );
10826 let r = _mm512_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(a);
10827 let e = _mm512_set_ps(
10828 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0., 0.25, 0.,
10829 );
10830 assert_eq_m512(r, e);
10831 }
10832
10833 #[simd_test(enable = "avx512dq")]
10834 fn test_mm512_mask_reduce_ps() {
10835 let a = _mm512_set_ps(
10836 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10837 4.0,
10838 );
10839 let src = _mm512_set_ps(
10840 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20.,
10841 );
10842 let r = _mm512_mask_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(src, 0b0110100100111100, a);
10843 let e = _mm512_set_ps(
10844 5., 0., 0.25, 8., 0.25, 10., 11., 0., 13., 14., 0.25, 0., 0.25, 0., 19., 20.,
10845 );
10846 assert_eq_m512(r, e);
10847 }
10848
10849 #[simd_test(enable = "avx512dq")]
10850 fn test_mm512_maskz_reduce_ps() {
10851 let a = _mm512_set_ps(
10852 0.25, 0.50, 0.75, 1.0, 1.25, 1.50, 1.75, 2.0, 2.25, 2.50, 2.75, 3.0, 3.25, 3.50, 3.75,
10853 4.0,
10854 );
10855 let r = _mm512_maskz_reduce_ps::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0110100100111100, a);
10856 let e = _mm512_set_ps(
10857 0., 0., 0.25, 0., 0.25, 0., 0., 0., 0., 0., 0.25, 0., 0.25, 0., 0., 0.,
10858 );
10859 assert_eq_m512(r, e);
10860 }
10861
10862 #[simd_test(enable = "avx512dq")]
10863 fn test_mm_reduce_round_sd() {
10864 let a = _mm_set_pd(1., 2.);
10865 let b = _mm_set_sd(0.25);
10866 let r = _mm_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
10867 let e = _mm_set_pd(1., 0.25);
10868 assert_eq_m128d(r, e);
10869 }
10870
10871 #[simd_test(enable = "avx512dq")]
10872 fn test_mm_mask_reduce_round_sd() {
10873 let a = _mm_set_pd(1., 2.);
10874 let b = _mm_set_sd(0.25);
10875 let c = _mm_set_pd(3., 4.);
10876 let r = _mm_mask_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10877 c, 0b0, a, b,
10878 );
10879 let e = _mm_set_pd(1., 4.);
10880 assert_eq_m128d(r, e);
10881 }
10882
10883 #[simd_test(enable = "avx512dq")]
10884 fn test_mm_maskz_reduce_round_sd() {
10885 let a = _mm_set_pd(1., 2.);
10886 let b = _mm_set_sd(0.25);
10887 let r =
10888 _mm_maskz_reduce_round_sd::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b);
10889 let e = _mm_set_pd(1., 0.);
10890 assert_eq_m128d(r, e);
10891 }
10892
10893 #[simd_test(enable = "avx512dq")]
10894 fn test_mm_reduce_sd() {
10895 let a = _mm_set_pd(1., 2.);
10896 let b = _mm_set_sd(0.25);
10897 let r = _mm_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
10898 let e = _mm_set_pd(1., 0.25);
10899 assert_eq_m128d(r, e);
10900 }
10901
10902 #[simd_test(enable = "avx512dq")]
10903 fn test_mm_mask_reduce_sd() {
10904 let a = _mm_set_pd(1., 2.);
10905 let b = _mm_set_sd(0.25);
10906 let c = _mm_set_pd(3., 4.);
10907 let r = _mm_mask_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b);
10908 let e = _mm_set_pd(1., 4.);
10909 assert_eq_m128d(r, e);
10910 }
10911
10912 #[simd_test(enable = "avx512dq")]
10913 fn test_mm_maskz_reduce_sd() {
10914 let a = _mm_set_pd(1., 2.);
10915 let b = _mm_set_sd(0.25);
10916 let r = _mm_maskz_reduce_sd::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b);
10917 let e = _mm_set_pd(1., 0.);
10918 assert_eq_m128d(r, e);
10919 }
10920
10921 #[simd_test(enable = "avx512dq")]
10922 fn test_mm_reduce_round_ss() {
10923 let a = _mm_set_ps(1., 2., 3., 4.);
10924 let b = _mm_set_ss(0.25);
10925 let r = _mm_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(a, b);
10926 let e = _mm_set_ps(1., 2., 3., 0.25);
10927 assert_eq_m128(r, e);
10928 }
10929
10930 #[simd_test(enable = "avx512dq")]
10931 fn test_mm_mask_reduce_round_ss() {
10932 let a = _mm_set_ps(1., 2., 3., 4.);
10933 let b = _mm_set_ss(0.25);
10934 let c = _mm_set_ps(5., 6., 7., 8.);
10935 let r = _mm_mask_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(
10936 c, 0b0, a, b,
10937 );
10938 let e = _mm_set_ps(1., 2., 3., 8.);
10939 assert_eq_m128(r, e);
10940 }
10941
10942 #[simd_test(enable = "avx512dq")]
10943 fn test_mm_maskz_reduce_round_ss() {
10944 let a = _mm_set_ps(1., 2., 3., 4.);
10945 let b = _mm_set_ss(0.25);
10946 let r =
10947 _mm_maskz_reduce_round_ss::<{ 16 | _MM_FROUND_TO_ZERO }, _MM_FROUND_NO_EXC>(0b0, a, b);
10948 let e = _mm_set_ps(1., 2., 3., 0.);
10949 assert_eq_m128(r, e);
10950 }
10951
10952 #[simd_test(enable = "avx512dq")]
10953 fn test_mm_reduce_ss() {
10954 let a = _mm_set_ps(1., 2., 3., 4.);
10955 let b = _mm_set_ss(0.25);
10956 let r = _mm_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(a, b);
10957 let e = _mm_set_ps(1., 2., 3., 0.25);
10958 assert_eq_m128(r, e);
10959 }
10960
10961 #[simd_test(enable = "avx512dq")]
10962 fn test_mm_mask_reduce_ss() {
10963 let a = _mm_set_ps(1., 2., 3., 4.);
10964 let b = _mm_set_ss(0.25);
10965 let c = _mm_set_ps(5., 6., 7., 8.);
10966 let r = _mm_mask_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(c, 0b0, a, b);
10967 let e = _mm_set_ps(1., 2., 3., 8.);
10968 assert_eq_m128(r, e);
10969 }
10970
10971 #[simd_test(enable = "avx512dq")]
10972 fn test_mm_maskz_reduce_ss() {
10973 let a = _mm_set_ps(1., 2., 3., 4.);
10974 let b = _mm_set_ss(0.25);
10975 let r = _mm_maskz_reduce_ss::<{ 16 | _MM_FROUND_TO_ZERO }>(0b0, a, b);
10976 let e = _mm_set_ps(1., 2., 3., 0.);
10977 assert_eq_m128(r, e);
10978 }
10979
10980 #[simd_test(enable = "avx512dq,avx512vl")]
10981 fn test_mm_fpclass_pd_mask() {
10982 let a = _mm_set_pd(1., f64::INFINITY);
10983 let r = _mm_fpclass_pd_mask::<0x18>(a);
10984 let e = 0b01;
10985 assert_eq!(r, e);
10986 }
10987
10988 #[simd_test(enable = "avx512dq,avx512vl")]
10989 fn test_mm_mask_fpclass_pd_mask() {
10990 let a = _mm_set_pd(1., f64::INFINITY);
10991 let r = _mm_mask_fpclass_pd_mask::<0x18>(0b10, a);
10992 let e = 0b00;
10993 assert_eq!(r, e);
10994 }
10995
10996 #[simd_test(enable = "avx512dq,avx512vl")]
10997 fn test_mm256_fpclass_pd_mask() {
10998 let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0);
10999 let r = _mm256_fpclass_pd_mask::<0x18>(a);
11000 let e = 0b0110;
11001 assert_eq!(r, e);
11002 }
11003
11004 #[simd_test(enable = "avx512dq,avx512vl")]
11005 fn test_mm256_mask_fpclass_pd_mask() {
11006 let a = _mm256_set_pd(1., f64::INFINITY, f64::NEG_INFINITY, 0.0);
11007 let r = _mm256_mask_fpclass_pd_mask::<0x18>(0b1010, a);
11008 let e = 0b0010;
11009 assert_eq!(r, e);
11010 }
11011
11012 #[simd_test(enable = "avx512dq")]
11013 fn test_mm512_fpclass_pd_mask() {
11014 let a = _mm512_set_pd(
11015 1.,
11016 f64::INFINITY,
11017 f64::NEG_INFINITY,
11018 0.0,
11019 -0.0,
11020 -2.0,
11021 f64::NAN,
11022 1.0e-308,
11023 );
11024 let r = _mm512_fpclass_pd_mask::<0x18>(a);
11025 let e = 0b01100000;
11026 assert_eq!(r, e);
11027 }
11028
11029 #[simd_test(enable = "avx512dq")]
11030 fn test_mm512_mask_fpclass_pd_mask() {
11031 let a = _mm512_set_pd(
11032 1.,
11033 f64::INFINITY,
11034 f64::NEG_INFINITY,
11035 0.0,
11036 -0.0,
11037 -2.0,
11038 f64::NAN,
11039 1.0e-308,
11040 );
11041 let r = _mm512_mask_fpclass_pd_mask::<0x18>(0b10101010, a);
11042 let e = 0b00100000;
11043 assert_eq!(r, e);
11044 }
11045
11046 #[simd_test(enable = "avx512dq,avx512vl")]
11047 fn test_mm_fpclass_ps_mask() {
11048 let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0);
11049 let r = _mm_fpclass_ps_mask::<0x18>(a);
11050 let e = 0b0110;
11051 assert_eq!(r, e);
11052 }
11053
11054 #[simd_test(enable = "avx512dq,avx512vl")]
11055 fn test_mm_mask_fpclass_ps_mask() {
11056 let a = _mm_set_ps(1., f32::INFINITY, f32::NEG_INFINITY, 0.0);
11057 let r = _mm_mask_fpclass_ps_mask::<0x18>(0b1010, a);
11058 let e = 0b0010;
11059 assert_eq!(r, e);
11060 }
11061
11062 #[simd_test(enable = "avx512dq,avx512vl")]
11063 fn test_mm256_fpclass_ps_mask() {
11064 let a = _mm256_set_ps(
11065 1.,
11066 f32::INFINITY,
11067 f32::NEG_INFINITY,
11068 0.0,
11069 -0.0,
11070 -2.0,
11071 f32::NAN,
11072 1.0e-38,
11073 );
11074 let r = _mm256_fpclass_ps_mask::<0x18>(a);
11075 let e = 0b01100000;
11076 assert_eq!(r, e);
11077 }
11078
11079 #[simd_test(enable = "avx512dq,avx512vl")]
11080 fn test_mm256_mask_fpclass_ps_mask() {
11081 let a = _mm256_set_ps(
11082 1.,
11083 f32::INFINITY,
11084 f32::NEG_INFINITY,
11085 0.0,
11086 -0.0,
11087 -2.0,
11088 f32::NAN,
11089 1.0e-38,
11090 );
11091 let r = _mm256_mask_fpclass_ps_mask::<0x18>(0b10101010, a);
11092 let e = 0b00100000;
11093 assert_eq!(r, e);
11094 }
11095
11096 #[simd_test(enable = "avx512dq")]
11097 fn test_mm512_fpclass_ps_mask() {
11098 let a = _mm512_set_ps(
11099 1.,
11100 f32::INFINITY,
11101 f32::NEG_INFINITY,
11102 0.0,
11103 -0.0,
11104 -2.0,
11105 f32::NAN,
11106 1.0e-38,
11107 -1.,
11108 f32::NEG_INFINITY,
11109 f32::INFINITY,
11110 -0.0,
11111 0.0,
11112 2.0,
11113 f32::NAN,
11114 -1.0e-38,
11115 );
11116 let r = _mm512_fpclass_ps_mask::<0x18>(a);
11117 let e = 0b0110000001100000;
11118 assert_eq!(r, e);
11119 }
11120
11121 #[simd_test(enable = "avx512dq")]
11122 fn test_mm512_mask_fpclass_ps_mask() {
11123 let a = _mm512_set_ps(
11124 1.,
11125 f32::INFINITY,
11126 f32::NEG_INFINITY,
11127 0.0,
11128 -0.0,
11129 -2.0,
11130 f32::NAN,
11131 1.0e-38,
11132 -1.,
11133 f32::NEG_INFINITY,
11134 f32::INFINITY,
11135 -0.0,
11136 0.0,
11137 2.0,
11138 f32::NAN,
11139 -1.0e-38,
11140 );
11141 let r = _mm512_mask_fpclass_ps_mask::<0x18>(0b1010101010101010, a);
11142 let e = 0b0010000000100000;
11143 assert_eq!(r, e);
11144 }
11145
11146 #[simd_test(enable = "avx512dq")]
11147 fn test_mm_fpclass_sd_mask() {
11148 let a = _mm_set_pd(1., f64::INFINITY);
11149 let r = _mm_fpclass_sd_mask::<0x18>(a);
11150 let e = 0b1;
11151 assert_eq!(r, e);
11152 }
11153
11154 #[simd_test(enable = "avx512dq")]
11155 fn test_mm_mask_fpclass_sd_mask() {
11156 let a = _mm_set_sd(f64::INFINITY);
11157 let r = _mm_mask_fpclass_sd_mask::<0x18>(0b0, a);
11158 let e = 0b0;
11159 assert_eq!(r, e);
11160 }
11161
11162 #[simd_test(enable = "avx512dq")]
11163 fn test_mm_fpclass_ss_mask() {
11164 let a = _mm_set_ss(f32::INFINITY);
11165 let r = _mm_fpclass_ss_mask::<0x18>(a);
11166 let e = 0b1;
11167 assert_eq!(r, e);
11168 }
11169
11170 #[simd_test(enable = "avx512dq")]
11171 fn test_mm_mask_fpclass_ss_mask() {
11172 let a = _mm_set_ss(f32::INFINITY);
11173 let r = _mm_mask_fpclass_ss_mask::<0x18>(0b0, a);
11174 let e = 0b0;
11175 assert_eq!(r, e);
11176 }
11177}
11178