1use crate::{
2 arch::asm,
3 core_arch::{simd::*, x86::*},
4 intrinsics::simd::*,
5 intrinsics::{fmaf32, fmaf64},
6 mem, ptr,
7};
8
9use core::hint::unreachable_unchecked;
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13/// Computes the absolute values of packed 32-bit integers in `a`.
14///
15/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39)
16#[inline]
17#[target_feature(enable = "avx512f")]
18#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19#[cfg_attr(test, assert_instr(vpabsd))]
20pub fn _mm512_abs_epi32(a: __m512i) -> __m512i {
21 unsafe {
22 let a: i32x16 = a.as_i32x16();
23 let r: i32x16 = simd_select::<i32x16, _>(mask:simd_lt(a, i32x16::ZERO), if_true:simd_neg(a), if_false:a);
24 transmute(src:r)
25 }
26}
27
28/// Computes the absolute value of packed 32-bit integers in `a`, and store the
29/// unsigned results in `dst` using writemask `k` (elements are copied from
30/// `src` when the corresponding mask bit is not set).
31///
32/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40)
33#[inline]
34#[target_feature(enable = "avx512f")]
35#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36#[cfg_attr(test, assert_instr(vpabsd))]
37pub fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
38 unsafe {
39 let abs: i32x16 = _mm512_abs_epi32(a).as_i32x16();
40 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x16()))
41 }
42}
43
44/// Computes the absolute value of packed 32-bit integers in `a`, and store the
45/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
46/// the corresponding mask bit is not set).
47///
48/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41)
49#[inline]
50#[target_feature(enable = "avx512f")]
51#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
52#[cfg_attr(test, assert_instr(vpabsd))]
53pub fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
54 unsafe {
55 let abs: i32x16 = _mm512_abs_epi32(a).as_i32x16();
56 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i32x16::ZERO))
57 }
58}
59
60/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
61///
62/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37)
63#[inline]
64#[target_feature(enable = "avx512f,avx512vl")]
65#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
66#[cfg_attr(test, assert_instr(vpabsd))]
67pub fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
68 unsafe {
69 let abs: i32x8 = _mm256_abs_epi32(a).as_i32x8();
70 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x8()))
71 }
72}
73
74/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
75///
76/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38)
77#[inline]
78#[target_feature(enable = "avx512f,avx512vl")]
79#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
80#[cfg_attr(test, assert_instr(vpabsd))]
81pub fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
82 unsafe {
83 let abs: i32x8 = _mm256_abs_epi32(a).as_i32x8();
84 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i32x8::ZERO))
85 }
86}
87
88/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
89///
90/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34)
91#[inline]
92#[target_feature(enable = "avx512f,avx512vl")]
93#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
94#[cfg_attr(test, assert_instr(vpabsd))]
95pub fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
96 unsafe {
97 let abs: i32x4 = _mm_abs_epi32(a).as_i32x4();
98 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x4()))
99 }
100}
101
102/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35)
105#[inline]
106#[target_feature(enable = "avx512f,avx512vl")]
107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
108#[cfg_attr(test, assert_instr(vpabsd))]
109pub fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
110 unsafe {
111 let abs: i32x4 = _mm_abs_epi32(a).as_i32x4();
112 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i32x4::ZERO))
113 }
114}
115
116/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48)
119#[inline]
120#[target_feature(enable = "avx512f")]
121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
122#[cfg_attr(test, assert_instr(vpabsq))]
123pub fn _mm512_abs_epi64(a: __m512i) -> __m512i {
124 unsafe {
125 let a: i64x8 = a.as_i64x8();
126 let r: i64x8 = simd_select::<i64x8, _>(mask:simd_lt(a, i64x8::ZERO), if_true:simd_neg(a), if_false:a);
127 transmute(src:r)
128 }
129}
130
131/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
132///
133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49)
134#[inline]
135#[target_feature(enable = "avx512f")]
136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
137#[cfg_attr(test, assert_instr(vpabsq))]
138pub fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
139 unsafe {
140 let abs: i64x8 = _mm512_abs_epi64(a).as_i64x8();
141 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x8()))
142 }
143}
144
145/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
146///
147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50)
148#[inline]
149#[target_feature(enable = "avx512f")]
150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
151#[cfg_attr(test, assert_instr(vpabsq))]
152pub fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
153 unsafe {
154 let abs: i64x8 = _mm512_abs_epi64(a).as_i64x8();
155 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i64x8::ZERO))
156 }
157}
158
159/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
160///
161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
162#[inline]
163#[target_feature(enable = "avx512f,avx512vl")]
164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
165#[cfg_attr(test, assert_instr(vpabsq))]
166pub fn _mm256_abs_epi64(a: __m256i) -> __m256i {
167 unsafe {
168 let a: i64x4 = a.as_i64x4();
169 let r: i64x4 = simd_select::<i64x4, _>(mask:simd_lt(a, i64x4::ZERO), if_true:simd_neg(a), if_false:a);
170 transmute(src:r)
171 }
172}
173
174/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
175///
176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46)
177#[inline]
178#[target_feature(enable = "avx512f,avx512vl")]
179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
180#[cfg_attr(test, assert_instr(vpabsq))]
181pub fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
182 unsafe {
183 let abs: i64x4 = _mm256_abs_epi64(a).as_i64x4();
184 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x4()))
185 }
186}
187
188/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
189///
190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64)
191#[inline]
192#[target_feature(enable = "avx512f,avx512vl")]
193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
194#[cfg_attr(test, assert_instr(vpabsq))]
195pub fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
196 unsafe {
197 let abs: i64x4 = _mm256_abs_epi64(a).as_i64x4();
198 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i64x4::ZERO))
199 }
200}
201
202/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
203///
204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64)
205#[inline]
206#[target_feature(enable = "avx512f,avx512vl")]
207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
208#[cfg_attr(test, assert_instr(vpabsq))]
209pub fn _mm_abs_epi64(a: __m128i) -> __m128i {
210 unsafe {
211 let a: i64x2 = a.as_i64x2();
212 let r: i64x2 = simd_select::<i64x2, _>(mask:simd_lt(a, i64x2::ZERO), if_true:simd_neg(a), if_false:a);
213 transmute(src:r)
214 }
215}
216
217/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
218///
219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64)
220#[inline]
221#[target_feature(enable = "avx512f,avx512vl")]
222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
223#[cfg_attr(test, assert_instr(vpabsq))]
224pub fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
225 unsafe {
226 let abs: i64x2 = _mm_abs_epi64(a).as_i64x2();
227 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x2()))
228 }
229}
230
231/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64)
234#[inline]
235#[target_feature(enable = "avx512f,avx512vl")]
236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
237#[cfg_attr(test, assert_instr(vpabsq))]
238pub fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
239 unsafe {
240 let abs: i64x2 = _mm_abs_epi64(a).as_i64x2();
241 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i64x2::ZERO))
242 }
243}
244
245/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
246///
247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
248#[inline]
249#[target_feature(enable = "avx512f")]
250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
251#[cfg_attr(test, assert_instr(vpandd))]
252pub fn _mm512_abs_ps(v2: __m512) -> __m512 {
253 unsafe { simd_fabs(v2) }
254}
255
256/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
257///
258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66)
259#[inline]
260#[target_feature(enable = "avx512f")]
261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
262#[cfg_attr(test, assert_instr(vpandd))]
263pub fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
264 unsafe { simd_select_bitmask(m:k, yes:simd_fabs(v2), no:src) }
265}
266
267/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
268///
269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60)
270#[inline]
271#[target_feature(enable = "avx512f")]
272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
273#[cfg_attr(test, assert_instr(vpandq))]
274pub fn _mm512_abs_pd(v2: __m512d) -> __m512d {
275 unsafe { simd_fabs(v2) }
276}
277
278/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
279///
280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61)
281#[inline]
282#[target_feature(enable = "avx512f")]
283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
284#[cfg_attr(test, assert_instr(vpandq))]
285pub fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
286 unsafe { simd_select_bitmask(m:k, yes:simd_fabs(v2), no:src) }
287}
288
289/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
290///
291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801)
292#[inline]
293#[target_feature(enable = "avx512f")]
294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
295#[cfg_attr(test, assert_instr(vmovdqa32))]
296pub fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
297 unsafe {
298 let mov: i32x16 = a.as_i32x16();
299 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x16()))
300 }
301}
302
303/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
304///
305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802)
306#[inline]
307#[target_feature(enable = "avx512f")]
308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
309#[cfg_attr(test, assert_instr(vmovdqa32))]
310pub fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
311 unsafe {
312 let mov: i32x16 = a.as_i32x16();
313 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i32x16::ZERO))
314 }
315}
316
317/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
318///
319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799)
320#[inline]
321#[target_feature(enable = "avx512f,avx512vl")]
322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
323#[cfg_attr(test, assert_instr(vmovdqa32))]
324pub fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
325 unsafe {
326 let mov: i32x8 = a.as_i32x8();
327 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x8()))
328 }
329}
330
331/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
332///
333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800)
334#[inline]
335#[target_feature(enable = "avx512f,avx512vl")]
336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
337#[cfg_attr(test, assert_instr(vmovdqa32))]
338pub fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
339 unsafe {
340 let mov: i32x8 = a.as_i32x8();
341 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i32x8::ZERO))
342 }
343}
344
345/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
346///
347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797)
348#[inline]
349#[target_feature(enable = "avx512f,avx512vl")]
350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
351#[cfg_attr(test, assert_instr(vmovdqa32))]
352pub fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
353 unsafe {
354 let mov: i32x4 = a.as_i32x4();
355 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x4()))
356 }
357}
358
359/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
360///
361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798)
362#[inline]
363#[target_feature(enable = "avx512f,avx512vl")]
364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
365#[cfg_attr(test, assert_instr(vmovdqa32))]
366pub fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
367 unsafe {
368 let mov: i32x4 = a.as_i32x4();
369 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i32x4::ZERO))
370 }
371}
372
373/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
374///
375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807)
376#[inline]
377#[target_feature(enable = "avx512f")]
378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
379#[cfg_attr(test, assert_instr(vmovdqa64))]
380pub fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
381 unsafe {
382 let mov: i64x8 = a.as_i64x8();
383 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x8()))
384 }
385}
386
387/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
388///
389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808)
390#[inline]
391#[target_feature(enable = "avx512f")]
392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
393#[cfg_attr(test, assert_instr(vmovdqa64))]
394pub fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
395 unsafe {
396 let mov: i64x8 = a.as_i64x8();
397 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i64x8::ZERO))
398 }
399}
400
401/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
402///
403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805)
404#[inline]
405#[target_feature(enable = "avx512f,avx512vl")]
406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
407#[cfg_attr(test, assert_instr(vmovdqa64))]
408pub fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
409 unsafe {
410 let mov: i64x4 = a.as_i64x4();
411 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x4()))
412 }
413}
414
415/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
416///
417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806)
418#[inline]
419#[target_feature(enable = "avx512f,avx512vl")]
420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
421#[cfg_attr(test, assert_instr(vmovdqa64))]
422pub fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
423 unsafe {
424 let mov: i64x4 = a.as_i64x4();
425 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i64x4::ZERO))
426 }
427}
428
429/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803)
432#[inline]
433#[target_feature(enable = "avx512f,avx512vl")]
434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
435#[cfg_attr(test, assert_instr(vmovdqa64))]
436pub fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
437 unsafe {
438 let mov: i64x2 = a.as_i64x2();
439 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x2()))
440 }
441}
442
443/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
444///
445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804)
446#[inline]
447#[target_feature(enable = "avx512f,avx512vl")]
448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
449#[cfg_attr(test, assert_instr(vmovdqa64))]
450pub fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
451 unsafe {
452 let mov: i64x2 = a.as_i64x2();
453 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i64x2::ZERO))
454 }
455}
456
457/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
458///
459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825)
460#[inline]
461#[target_feature(enable = "avx512f")]
462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
463#[cfg_attr(test, assert_instr(vmovaps))]
464pub fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
465 unsafe {
466 let mov: f32x16 = a.as_f32x16();
467 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
468 }
469}
470
471/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
472///
473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826)
474#[inline]
475#[target_feature(enable = "avx512f")]
476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
477#[cfg_attr(test, assert_instr(vmovaps))]
478pub fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
479 unsafe {
480 let mov: f32x16 = a.as_f32x16();
481 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x16::ZERO))
482 }
483}
484
485/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
486///
487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823)
488#[inline]
489#[target_feature(enable = "avx512f,avx512vl")]
490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
491#[cfg_attr(test, assert_instr(vmovaps))]
492pub fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
493 unsafe {
494 let mov: f32x8 = a.as_f32x8();
495 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x8()))
496 }
497}
498
499/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
500///
501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824)
502#[inline]
503#[target_feature(enable = "avx512f,avx512vl")]
504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
505#[cfg_attr(test, assert_instr(vmovaps))]
506pub fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
507 unsafe {
508 let mov: f32x8 = a.as_f32x8();
509 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x8::ZERO))
510 }
511}
512
513/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821)
516#[inline]
517#[target_feature(enable = "avx512f,avx512vl")]
518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
519#[cfg_attr(test, assert_instr(vmovaps))]
520pub fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
521 unsafe {
522 let mov: f32x4 = a.as_f32x4();
523 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x4()))
524 }
525}
526
527/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
528///
529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822)
530#[inline]
531#[target_feature(enable = "avx512f,avx512vl")]
532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
533#[cfg_attr(test, assert_instr(vmovaps))]
534pub fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
535 unsafe {
536 let mov: f32x4 = a.as_f32x4();
537 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x4::ZERO))
538 }
539}
540
541/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
542///
543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819)
544#[inline]
545#[target_feature(enable = "avx512f")]
546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
547#[cfg_attr(test, assert_instr(vmovapd))]
548pub fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
549 unsafe {
550 let mov: f64x8 = a.as_f64x8();
551 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x8()))
552 }
553}
554
555/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
556///
557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820)
558#[inline]
559#[target_feature(enable = "avx512f")]
560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
561#[cfg_attr(test, assert_instr(vmovapd))]
562pub fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
563 unsafe {
564 let mov: f64x8 = a.as_f64x8();
565 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x8::ZERO))
566 }
567}
568
569/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
570///
571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817)
572#[inline]
573#[target_feature(enable = "avx512f,avx512vl")]
574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
575#[cfg_attr(test, assert_instr(vmovapd))]
576pub fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
577 unsafe {
578 let mov: f64x4 = a.as_f64x4();
579 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x4()))
580 }
581}
582
583/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
584///
585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818)
586#[inline]
587#[target_feature(enable = "avx512f,avx512vl")]
588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
589#[cfg_attr(test, assert_instr(vmovapd))]
590pub fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
591 unsafe {
592 let mov: f64x4 = a.as_f64x4();
593 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x4::ZERO))
594 }
595}
596
597/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
598///
599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815)
600#[inline]
601#[target_feature(enable = "avx512f,avx512vl")]
602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
603#[cfg_attr(test, assert_instr(vmovapd))]
604pub fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
605 unsafe {
606 let mov: f64x2 = a.as_f64x2();
607 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x2()))
608 }
609}
610
611/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
612///
613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816)
614#[inline]
615#[target_feature(enable = "avx512f,avx512vl")]
616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
617#[cfg_attr(test, assert_instr(vmovapd))]
618pub fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
619 unsafe {
620 let mov: f64x2 = a.as_f64x2();
621 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x2::ZERO))
622 }
623}
624
625/// Add packed 32-bit integers in a and b, and store the results in dst.
626///
627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100)
628#[inline]
629#[target_feature(enable = "avx512f")]
630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
631#[cfg_attr(test, assert_instr(vpaddd))]
632pub fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
633 unsafe { transmute(src:simd_add(x:a.as_i32x16(), y:b.as_i32x16())) }
634}
635
636/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
637///
638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101)
639#[inline]
640#[target_feature(enable = "avx512f")]
641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
642#[cfg_attr(test, assert_instr(vpaddd))]
643pub fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
644 unsafe {
645 let add: i32x16 = _mm512_add_epi32(a, b).as_i32x16();
646 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x16()))
647 }
648}
649
650/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
651///
652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102)
653#[inline]
654#[target_feature(enable = "avx512f")]
655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
656#[cfg_attr(test, assert_instr(vpaddd))]
657pub fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
658 unsafe {
659 let add: i32x16 = _mm512_add_epi32(a, b).as_i32x16();
660 transmute(src:simd_select_bitmask(m:k, yes:add, no:i32x16::ZERO))
661 }
662}
663
664/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
665///
666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98)
667#[inline]
668#[target_feature(enable = "avx512f,avx512vl")]
669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
670#[cfg_attr(test, assert_instr(vpaddd))]
671pub fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
672 unsafe {
673 let add: i32x8 = _mm256_add_epi32(a, b).as_i32x8();
674 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x8()))
675 }
676}
677
678/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
679///
680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99)
681#[inline]
682#[target_feature(enable = "avx512f,avx512vl")]
683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
684#[cfg_attr(test, assert_instr(vpaddd))]
685pub fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
686 unsafe {
687 let add: i32x8 = _mm256_add_epi32(a, b).as_i32x8();
688 transmute(src:simd_select_bitmask(m:k, yes:add, no:i32x8::ZERO))
689 }
690}
691
692/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
693///
694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95)
695#[inline]
696#[target_feature(enable = "avx512f,avx512vl")]
697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
698#[cfg_attr(test, assert_instr(vpaddd))]
699pub fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
700 unsafe {
701 let add: i32x4 = _mm_add_epi32(a, b).as_i32x4();
702 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x4()))
703 }
704}
705
706/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
707///
708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96)
709#[inline]
710#[target_feature(enable = "avx512f,avx512vl")]
711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
712#[cfg_attr(test, assert_instr(vpaddd))]
713pub fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
714 unsafe {
715 let add: i32x4 = _mm_add_epi32(a, b).as_i32x4();
716 transmute(src:simd_select_bitmask(m:k, yes:add, no:i32x4::ZERO))
717 }
718}
719
720/// Add packed 64-bit integers in a and b, and store the results in dst.
721///
722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109)
723#[inline]
724#[target_feature(enable = "avx512f")]
725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
726#[cfg_attr(test, assert_instr(vpaddq))]
727pub fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
728 unsafe { transmute(src:simd_add(x:a.as_i64x8(), y:b.as_i64x8())) }
729}
730
731/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
732///
733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110)
734#[inline]
735#[target_feature(enable = "avx512f")]
736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
737#[cfg_attr(test, assert_instr(vpaddq))]
738pub fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
739 unsafe {
740 let add: i64x8 = _mm512_add_epi64(a, b).as_i64x8();
741 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x8()))
742 }
743}
744
745/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
746///
747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111)
748#[inline]
749#[target_feature(enable = "avx512f")]
750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
751#[cfg_attr(test, assert_instr(vpaddq))]
752pub fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
753 unsafe {
754 let add: i64x8 = _mm512_add_epi64(a, b).as_i64x8();
755 transmute(src:simd_select_bitmask(m:k, yes:add, no:i64x8::ZERO))
756 }
757}
758
759/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
760///
761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107)
762#[inline]
763#[target_feature(enable = "avx512f,avx512vl")]
764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
765#[cfg_attr(test, assert_instr(vpaddq))]
766pub fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
767 unsafe {
768 let add: i64x4 = _mm256_add_epi64(a, b).as_i64x4();
769 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x4()))
770 }
771}
772
773/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
774///
775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108)
776#[inline]
777#[target_feature(enable = "avx512f,avx512vl")]
778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
779#[cfg_attr(test, assert_instr(vpaddq))]
780pub fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
781 unsafe {
782 let add: i64x4 = _mm256_add_epi64(a, b).as_i64x4();
783 transmute(src:simd_select_bitmask(m:k, yes:add, no:i64x4::ZERO))
784 }
785}
786
787/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
788///
789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104)
790#[inline]
791#[target_feature(enable = "avx512f,avx512vl")]
792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
793#[cfg_attr(test, assert_instr(vpaddq))]
794pub fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
795 unsafe {
796 let add: i64x2 = _mm_add_epi64(a, b).as_i64x2();
797 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x2()))
798 }
799}
800
801/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
802///
803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105)
804#[inline]
805#[target_feature(enable = "avx512f,avx512vl")]
806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
807#[cfg_attr(test, assert_instr(vpaddq))]
808pub fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
809 unsafe {
810 let add: i64x2 = _mm_add_epi64(a, b).as_i64x2();
811 transmute(src:simd_select_bitmask(m:k, yes:add, no:i64x2::ZERO))
812 }
813}
814
815/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
816///
817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139)
818#[inline]
819#[target_feature(enable = "avx512f")]
820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
821#[cfg_attr(test, assert_instr(vaddps))]
822pub fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
823 unsafe { transmute(src:simd_add(x:a.as_f32x16(), y:b.as_f32x16())) }
824}
825
826/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
827///
828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140)
829#[inline]
830#[target_feature(enable = "avx512f")]
831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
832#[cfg_attr(test, assert_instr(vaddps))]
833pub fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
834 unsafe {
835 let add: f32x16 = _mm512_add_ps(a, b).as_f32x16();
836 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x16()))
837 }
838}
839
840/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
841///
842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141)
843#[inline]
844#[target_feature(enable = "avx512f")]
845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
846#[cfg_attr(test, assert_instr(vaddps))]
847pub fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
848 unsafe {
849 let add: f32x16 = _mm512_add_ps(a, b).as_f32x16();
850 transmute(src:simd_select_bitmask(m:k, yes:add, no:f32x16::ZERO))
851 }
852}
853
854/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
855///
856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137)
857#[inline]
858#[target_feature(enable = "avx512f,avx512vl")]
859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
860#[cfg_attr(test, assert_instr(vaddps))]
861pub fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
862 unsafe {
863 let add: f32x8 = _mm256_add_ps(a, b).as_f32x8();
864 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x8()))
865 }
866}
867
868/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
869///
870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138)
871#[inline]
872#[target_feature(enable = "avx512f,avx512vl")]
873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
874#[cfg_attr(test, assert_instr(vaddps))]
875pub fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
876 unsafe {
877 let add: f32x8 = _mm256_add_ps(a, b).as_f32x8();
878 transmute(src:simd_select_bitmask(m:k, yes:add, no:f32x8::ZERO))
879 }
880}
881
882/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
883///
884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134)
885#[inline]
886#[target_feature(enable = "avx512f,avx512vl")]
887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
888#[cfg_attr(test, assert_instr(vaddps))]
889pub fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
890 unsafe {
891 let add: f32x4 = _mm_add_ps(a, b).as_f32x4();
892 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x4()))
893 }
894}
895
896/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
897///
898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135)
899#[inline]
900#[target_feature(enable = "avx512f,avx512vl")]
901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
902#[cfg_attr(test, assert_instr(vaddps))]
903pub fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
904 unsafe {
905 let add: f32x4 = _mm_add_ps(a, b).as_f32x4();
906 transmute(src:simd_select_bitmask(m:k, yes:add, no:f32x4::ZERO))
907 }
908}
909
910/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
911///
912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127)
913#[inline]
914#[target_feature(enable = "avx512f")]
915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
916#[cfg_attr(test, assert_instr(vaddpd))]
917pub fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
918 unsafe { transmute(src:simd_add(x:a.as_f64x8(), y:b.as_f64x8())) }
919}
920
921/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
922///
923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128)
924#[inline]
925#[target_feature(enable = "avx512f")]
926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
927#[cfg_attr(test, assert_instr(vaddpd))]
928pub fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
929 unsafe {
930 let add: f64x8 = _mm512_add_pd(a, b).as_f64x8();
931 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x8()))
932 }
933}
934
935/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
936///
937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129)
938#[inline]
939#[target_feature(enable = "avx512f")]
940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
941#[cfg_attr(test, assert_instr(vaddpd))]
942pub fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
943 unsafe {
944 let add: f64x8 = _mm512_add_pd(a, b).as_f64x8();
945 transmute(src:simd_select_bitmask(m:k, yes:add, no:f64x8::ZERO))
946 }
947}
948
949/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
950///
951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125)
952#[inline]
953#[target_feature(enable = "avx512f,avx512vl")]
954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
955#[cfg_attr(test, assert_instr(vaddpd))]
956pub fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
957 unsafe {
958 let add: f64x4 = _mm256_add_pd(a, b).as_f64x4();
959 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x4()))
960 }
961}
962
963/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
964///
965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126)
966#[inline]
967#[target_feature(enable = "avx512f,avx512vl")]
968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
969#[cfg_attr(test, assert_instr(vaddpd))]
970pub fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
971 unsafe {
972 let add: f64x4 = _mm256_add_pd(a, b).as_f64x4();
973 transmute(src:simd_select_bitmask(m:k, yes:add, no:f64x4::ZERO))
974 }
975}
976
977/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
978///
979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122)
980#[inline]
981#[target_feature(enable = "avx512f,avx512vl")]
982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
983#[cfg_attr(test, assert_instr(vaddpd))]
984pub fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
985 unsafe {
986 let add: f64x2 = _mm_add_pd(a, b).as_f64x2();
987 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x2()))
988 }
989}
990
991/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
992///
993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123)
994#[inline]
995#[target_feature(enable = "avx512f,avx512vl")]
996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
997#[cfg_attr(test, assert_instr(vaddpd))]
998pub fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
999 unsafe {
1000 let add: f64x2 = _mm_add_pd(a, b).as_f64x2();
1001 transmute(src:simd_select_bitmask(m:k, yes:add, no:f64x2::ZERO))
1002 }
1003}
1004
1005/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
1006///
1007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694)
1008#[inline]
1009#[target_feature(enable = "avx512f")]
1010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1011#[cfg_attr(test, assert_instr(vpsubd))]
1012pub fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
1013 unsafe { transmute(src:simd_sub(lhs:a.as_i32x16(), rhs:b.as_i32x16())) }
1014}
1015
1016/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1017///
1018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692)
1019#[inline]
1020#[target_feature(enable = "avx512f")]
1021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1022#[cfg_attr(test, assert_instr(vpsubd))]
1023pub fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1024 unsafe {
1025 let sub: i32x16 = _mm512_sub_epi32(a, b).as_i32x16();
1026 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x16()))
1027 }
1028}
1029
1030/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1031///
1032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693)
1033#[inline]
1034#[target_feature(enable = "avx512f")]
1035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1036#[cfg_attr(test, assert_instr(vpsubd))]
1037pub fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1038 unsafe {
1039 let sub: i32x16 = _mm512_sub_epi32(a, b).as_i32x16();
1040 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i32x16::ZERO))
1041 }
1042}
1043
1044/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1045///
1046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689)
1047#[inline]
1048#[target_feature(enable = "avx512f,avx512vl")]
1049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1050#[cfg_attr(test, assert_instr(vpsubd))]
1051pub fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1052 unsafe {
1053 let sub: i32x8 = _mm256_sub_epi32(a, b).as_i32x8();
1054 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x8()))
1055 }
1056}
1057
1058/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1059///
1060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690)
1061#[inline]
1062#[target_feature(enable = "avx512f,avx512vl")]
1063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1064#[cfg_attr(test, assert_instr(vpsubd))]
1065pub fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1066 unsafe {
1067 let sub: i32x8 = _mm256_sub_epi32(a, b).as_i32x8();
1068 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i32x8::ZERO))
1069 }
1070}
1071
1072/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1073///
1074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686)
1075#[inline]
1076#[target_feature(enable = "avx512f,avx512vl")]
1077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1078#[cfg_attr(test, assert_instr(vpsubd))]
1079pub fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1080 unsafe {
1081 let sub: i32x4 = _mm_sub_epi32(a, b).as_i32x4();
1082 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x4()))
1083 }
1084}
1085
1086/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1087///
1088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687)
1089#[inline]
1090#[target_feature(enable = "avx512f,avx512vl")]
1091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1092#[cfg_attr(test, assert_instr(vpsubd))]
1093pub fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1094 unsafe {
1095 let sub: i32x4 = _mm_sub_epi32(a, b).as_i32x4();
1096 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i32x4::ZERO))
1097 }
1098}
1099
1100/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
1101///
1102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703)
1103#[inline]
1104#[target_feature(enable = "avx512f")]
1105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1106#[cfg_attr(test, assert_instr(vpsubq))]
1107pub fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
1108 unsafe { transmute(src:simd_sub(lhs:a.as_i64x8(), rhs:b.as_i64x8())) }
1109}
1110
1111/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1112///
1113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701)
1114#[inline]
1115#[target_feature(enable = "avx512f")]
1116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1117#[cfg_attr(test, assert_instr(vpsubq))]
1118pub fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1119 unsafe {
1120 let sub: i64x8 = _mm512_sub_epi64(a, b).as_i64x8();
1121 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x8()))
1122 }
1123}
1124
1125/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1126///
1127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702)
1128#[inline]
1129#[target_feature(enable = "avx512f")]
1130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1131#[cfg_attr(test, assert_instr(vpsubq))]
1132pub fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1133 unsafe {
1134 let sub: i64x8 = _mm512_sub_epi64(a, b).as_i64x8();
1135 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i64x8::ZERO))
1136 }
1137}
1138
1139/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1140///
1141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698)
1142#[inline]
1143#[target_feature(enable = "avx512f,avx512vl")]
1144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1145#[cfg_attr(test, assert_instr(vpsubq))]
1146pub fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1147 unsafe {
1148 let sub: i64x4 = _mm256_sub_epi64(a, b).as_i64x4();
1149 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x4()))
1150 }
1151}
1152
1153/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1154///
1155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699)
1156#[inline]
1157#[target_feature(enable = "avx512f,avx512vl")]
1158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1159#[cfg_attr(test, assert_instr(vpsubq))]
1160pub fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1161 unsafe {
1162 let sub: i64x4 = _mm256_sub_epi64(a, b).as_i64x4();
1163 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i64x4::ZERO))
1164 }
1165}
1166
1167/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1168///
1169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695)
1170#[inline]
1171#[target_feature(enable = "avx512f,avx512vl")]
1172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1173#[cfg_attr(test, assert_instr(vpsubq))]
1174pub fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1175 unsafe {
1176 let sub: i64x2 = _mm_sub_epi64(a, b).as_i64x2();
1177 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x2()))
1178 }
1179}
1180
1181/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1182///
1183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696)
1184#[inline]
1185#[target_feature(enable = "avx512f,avx512vl")]
1186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1187#[cfg_attr(test, assert_instr(vpsubq))]
1188pub fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1189 unsafe {
1190 let sub: i64x2 = _mm_sub_epi64(a, b).as_i64x2();
1191 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i64x2::ZERO))
1192 }
1193}
1194
1195/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1196///
1197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733)
1198#[inline]
1199#[target_feature(enable = "avx512f")]
1200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1201#[cfg_attr(test, assert_instr(vsubps))]
1202pub fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
1203 unsafe { transmute(src:simd_sub(lhs:a.as_f32x16(), rhs:b.as_f32x16())) }
1204}
1205
1206/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1207///
1208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731)
1209#[inline]
1210#[target_feature(enable = "avx512f")]
1211#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1212#[cfg_attr(test, assert_instr(vsubps))]
1213pub fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1214 unsafe {
1215 let sub: f32x16 = _mm512_sub_ps(a, b).as_f32x16();
1216 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x16()))
1217 }
1218}
1219
1220/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1221///
1222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732)
1223#[inline]
1224#[target_feature(enable = "avx512f")]
1225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1226#[cfg_attr(test, assert_instr(vsubps))]
1227pub fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1228 unsafe {
1229 let sub: f32x16 = _mm512_sub_ps(a, b).as_f32x16();
1230 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f32x16::ZERO))
1231 }
1232}
1233
1234/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1235///
1236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728)
1237#[inline]
1238#[target_feature(enable = "avx512f,avx512vl")]
1239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1240#[cfg_attr(test, assert_instr(vsubps))]
1241pub fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1242 unsafe {
1243 let sub: f32x8 = _mm256_sub_ps(a, b).as_f32x8();
1244 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x8()))
1245 }
1246}
1247
1248/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1249///
1250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729)
1251#[inline]
1252#[target_feature(enable = "avx512f,avx512vl")]
1253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1254#[cfg_attr(test, assert_instr(vsubps))]
1255pub fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1256 unsafe {
1257 let sub: f32x8 = _mm256_sub_ps(a, b).as_f32x8();
1258 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f32x8::ZERO))
1259 }
1260}
1261
1262/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1263///
1264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725)
1265#[inline]
1266#[target_feature(enable = "avx512f,avx512vl")]
1267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1268#[cfg_attr(test, assert_instr(vsubps))]
1269pub fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1270 unsafe {
1271 let sub: f32x4 = _mm_sub_ps(a, b).as_f32x4();
1272 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x4()))
1273 }
1274}
1275
1276/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1277///
1278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726)
1279#[inline]
1280#[target_feature(enable = "avx512f,avx512vl")]
1281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1282#[cfg_attr(test, assert_instr(vsubps))]
1283pub fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1284 unsafe {
1285 let sub: f32x4 = _mm_sub_ps(a, b).as_f32x4();
1286 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f32x4::ZERO))
1287 }
1288}
1289
1290/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1291///
1292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721)
1293#[inline]
1294#[target_feature(enable = "avx512f")]
1295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1296#[cfg_attr(test, assert_instr(vsubpd))]
1297pub fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
1298 unsafe { transmute(src:simd_sub(lhs:a.as_f64x8(), rhs:b.as_f64x8())) }
1299}
1300
1301/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1302///
1303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719)
1304#[inline]
1305#[target_feature(enable = "avx512f")]
1306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1307#[cfg_attr(test, assert_instr(vsubpd))]
1308pub fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1309 unsafe {
1310 let sub: f64x8 = _mm512_sub_pd(a, b).as_f64x8();
1311 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x8()))
1312 }
1313}
1314
1315/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1316///
1317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720)
1318#[inline]
1319#[target_feature(enable = "avx512f")]
1320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1321#[cfg_attr(test, assert_instr(vsubpd))]
1322pub fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1323 unsafe {
1324 let sub: f64x8 = _mm512_sub_pd(a, b).as_f64x8();
1325 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f64x8::ZERO))
1326 }
1327}
1328
1329/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1330///
1331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716)
1332#[inline]
1333#[target_feature(enable = "avx512f,avx512vl")]
1334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1335#[cfg_attr(test, assert_instr(vsubpd))]
1336pub fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1337 unsafe {
1338 let sub: f64x4 = _mm256_sub_pd(a, b).as_f64x4();
1339 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x4()))
1340 }
1341}
1342
1343/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1344///
1345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717)
1346#[inline]
1347#[target_feature(enable = "avx512f,avx512vl")]
1348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1349#[cfg_attr(test, assert_instr(vsubpd))]
1350pub fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1351 unsafe {
1352 let sub: f64x4 = _mm256_sub_pd(a, b).as_f64x4();
1353 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f64x4::ZERO))
1354 }
1355}
1356
1357/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1358///
1359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713)
1360#[inline]
1361#[target_feature(enable = "avx512f,avx512vl")]
1362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1363#[cfg_attr(test, assert_instr(vsubpd))]
1364pub fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1365 unsafe {
1366 let sub: f64x2 = _mm_sub_pd(a, b).as_f64x2();
1367 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x2()))
1368 }
1369}
1370
1371/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1372///
1373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714)
1374#[inline]
1375#[target_feature(enable = "avx512f,avx512vl")]
1376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1377#[cfg_attr(test, assert_instr(vsubpd))]
1378pub fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1379 unsafe {
1380 let sub: f64x2 = _mm_sub_pd(a, b).as_f64x2();
1381 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f64x2::ZERO))
1382 }
1383}
1384
1385/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
1386///
1387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907)
1388#[inline]
1389#[target_feature(enable = "avx512f")]
1390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1391#[cfg_attr(test, assert_instr(vpmuldq))]
1392pub fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
1393 unsafe {
1394 let a: i64x8 = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
1395 let b: i64x8 = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
1396 transmute(src:simd_mul(x:a, y:b))
1397 }
1398}
1399
1400/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1401///
1402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905)
1403#[inline]
1404#[target_feature(enable = "avx512f")]
1405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1406#[cfg_attr(test, assert_instr(vpmuldq))]
1407pub fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1408 unsafe {
1409 let mul: i64x8 = _mm512_mul_epi32(a, b).as_i64x8();
1410 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x8()))
1411 }
1412}
1413
1414/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1415///
1416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906)
1417#[inline]
1418#[target_feature(enable = "avx512f")]
1419#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1420#[cfg_attr(test, assert_instr(vpmuldq))]
1421pub fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1422 unsafe {
1423 let mul: i64x8 = _mm512_mul_epi32(a, b).as_i64x8();
1424 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i64x8::ZERO))
1425 }
1426}
1427
1428/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1429///
1430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902)
1431#[inline]
1432#[target_feature(enable = "avx512f,avx512vl")]
1433#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1434#[cfg_attr(test, assert_instr(vpmuldq))]
1435pub fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1436 unsafe {
1437 let mul: i64x4 = _mm256_mul_epi32(a, b).as_i64x4();
1438 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x4()))
1439 }
1440}
1441
1442/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1443///
1444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903)
1445#[inline]
1446#[target_feature(enable = "avx512f,avx512vl")]
1447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1448#[cfg_attr(test, assert_instr(vpmuldq))]
1449pub fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1450 unsafe {
1451 let mul: i64x4 = _mm256_mul_epi32(a, b).as_i64x4();
1452 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i64x4::ZERO))
1453 }
1454}
1455
1456/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1457///
1458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899)
1459#[inline]
1460#[target_feature(enable = "avx512f,avx512vl")]
1461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1462#[cfg_attr(test, assert_instr(vpmuldq))]
1463pub fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1464 unsafe {
1465 let mul: i64x2 = _mm_mul_epi32(a, b).as_i64x2();
1466 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x2()))
1467 }
1468}
1469
1470/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1471///
1472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900)
1473#[inline]
1474#[target_feature(enable = "avx512f,avx512vl")]
1475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1476#[cfg_attr(test, assert_instr(vpmuldq))]
1477pub fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1478 unsafe {
1479 let mul: i64x2 = _mm_mul_epi32(a, b).as_i64x2();
1480 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i64x2::ZERO))
1481 }
1482}
1483
1484/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
1485///
1486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi32&expand=4005)
1487#[inline]
1488#[target_feature(enable = "avx512f")]
1489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1490#[cfg_attr(test, assert_instr(vpmulld))]
1491pub fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
1492 unsafe { transmute(src:simd_mul(x:a.as_i32x16(), y:b.as_i32x16())) }
1493}
1494
1495/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1496///
1497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003)
1498#[inline]
1499#[target_feature(enable = "avx512f")]
1500#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1501#[cfg_attr(test, assert_instr(vpmulld))]
1502pub fn _mm512_mask_mullo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1503 unsafe {
1504 let mul: i32x16 = _mm512_mullo_epi32(a, b).as_i32x16();
1505 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x16()))
1506 }
1507}
1508
1509/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1510///
1511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004)
1512#[inline]
1513#[target_feature(enable = "avx512f")]
1514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1515#[cfg_attr(test, assert_instr(vpmulld))]
1516pub fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1517 unsafe {
1518 let mul: i32x16 = _mm512_mullo_epi32(a, b).as_i32x16();
1519 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i32x16::ZERO))
1520 }
1521}
1522
1523/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1524///
1525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000)
1526#[inline]
1527#[target_feature(enable = "avx512f,avx512vl")]
1528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1529#[cfg_attr(test, assert_instr(vpmulld))]
1530pub fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1531 unsafe {
1532 let mul: i32x8 = _mm256_mullo_epi32(a, b).as_i32x8();
1533 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x8()))
1534 }
1535}
1536
1537/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1538///
1539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001)
1540#[inline]
1541#[target_feature(enable = "avx512f,avx512vl")]
1542#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1543#[cfg_attr(test, assert_instr(vpmulld))]
1544pub fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1545 unsafe {
1546 let mul: i32x8 = _mm256_mullo_epi32(a, b).as_i32x8();
1547 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i32x8::ZERO))
1548 }
1549}
1550
1551/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1552///
1553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997)
1554#[inline]
1555#[target_feature(enable = "avx512f,avx512vl")]
1556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1557#[cfg_attr(test, assert_instr(vpmulld))]
1558pub fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1559 unsafe {
1560 let mul: i32x4 = _mm_mullo_epi32(a, b).as_i32x4();
1561 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x4()))
1562 }
1563}
1564
1565/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1566///
1567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998)
1568#[inline]
1569#[target_feature(enable = "avx512f,avx512vl")]
1570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1571#[cfg_attr(test, assert_instr(vpmulld))]
1572pub fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1573 unsafe {
1574 let mul: i32x4 = _mm_mullo_epi32(a, b).as_i32x4();
1575 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i32x4::ZERO))
1576 }
1577}
1578
1579/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
1580///
1581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullox_epi64&expand=4017)
1582///
1583/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1584#[inline]
1585#[target_feature(enable = "avx512f")]
1586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1587pub fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
1588 unsafe { transmute(src:simd_mul(x:a.as_i64x8(), y:b.as_i64x8())) }
1589}
1590
1591/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1592///
1593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullox_epi64&expand=4016)
1594///
1595/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1596#[inline]
1597#[target_feature(enable = "avx512f")]
1598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1599pub fn _mm512_mask_mullox_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1600 unsafe {
1601 let mul: i64x8 = _mm512_mullox_epi64(a, b).as_i64x8();
1602 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x8()))
1603 }
1604}
1605
1606/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
1607///
1608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epu32&expand=3916)
1609#[inline]
1610#[target_feature(enable = "avx512f")]
1611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1612#[cfg_attr(test, assert_instr(vpmuludq))]
1613pub fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
1614 unsafe {
1615 let a: u64x8 = a.as_u64x8();
1616 let b: u64x8 = b.as_u64x8();
1617 let mask: u64x8 = u64x8::splat(u32::MAX.into());
1618 transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask)))
1619 }
1620}
1621
1622/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1623///
1624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epu32&expand=3914)
1625#[inline]
1626#[target_feature(enable = "avx512f")]
1627#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1628#[cfg_attr(test, assert_instr(vpmuludq))]
1629pub fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1630 unsafe {
1631 let mul: u64x8 = _mm512_mul_epu32(a, b).as_u64x8();
1632 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x8()))
1633 }
1634}
1635
1636/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1637///
1638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epu32&expand=3915)
1639#[inline]
1640#[target_feature(enable = "avx512f")]
1641#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1642#[cfg_attr(test, assert_instr(vpmuludq))]
1643pub fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1644 unsafe {
1645 let mul: u64x8 = _mm512_mul_epu32(a, b).as_u64x8();
1646 transmute(src:simd_select_bitmask(m:k, yes:mul, no:u64x8::ZERO))
1647 }
1648}
1649
1650/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1651///
1652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911)
1653#[inline]
1654#[target_feature(enable = "avx512f,avx512vl")]
1655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1656#[cfg_attr(test, assert_instr(vpmuludq))]
1657pub fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1658 unsafe {
1659 let mul: u64x4 = _mm256_mul_epu32(a, b).as_u64x4();
1660 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x4()))
1661 }
1662}
1663
1664/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1665///
1666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912)
1667#[inline]
1668#[target_feature(enable = "avx512f,avx512vl")]
1669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1670#[cfg_attr(test, assert_instr(vpmuludq))]
1671pub fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1672 unsafe {
1673 let mul: u64x4 = _mm256_mul_epu32(a, b).as_u64x4();
1674 transmute(src:simd_select_bitmask(m:k, yes:mul, no:u64x4::ZERO))
1675 }
1676}
1677
1678/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1679///
1680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908)
1681#[inline]
1682#[target_feature(enable = "avx512f,avx512vl")]
1683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1684#[cfg_attr(test, assert_instr(vpmuludq))]
1685pub fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1686 unsafe {
1687 let mul: u64x2 = _mm_mul_epu32(a, b).as_u64x2();
1688 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x2()))
1689 }
1690}
1691
1692/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1693///
1694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909)
1695#[inline]
1696#[target_feature(enable = "avx512f,avx512vl")]
1697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1698#[cfg_attr(test, assert_instr(vpmuludq))]
1699pub fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1700 unsafe {
1701 let mul: u64x2 = _mm_mul_epu32(a, b).as_u64x2();
1702 transmute(src:simd_select_bitmask(m:k, yes:mul, no:u64x2::ZERO))
1703 }
1704}
1705
1706/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
1707///
1708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934)
1709#[inline]
1710#[target_feature(enable = "avx512f")]
1711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1712#[cfg_attr(test, assert_instr(vmulps))]
1713pub fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
1714 unsafe { transmute(src:simd_mul(x:a.as_f32x16(), y:b.as_f32x16())) }
1715}
1716
1717/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1718///
1719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932)
1720#[inline]
1721#[target_feature(enable = "avx512f")]
1722#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1723#[cfg_attr(test, assert_instr(vmulps))]
1724pub fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1725 unsafe {
1726 let mul: f32x16 = _mm512_mul_ps(a, b).as_f32x16();
1727 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x16()))
1728 }
1729}
1730
1731/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1732///
1733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933)
1734#[inline]
1735#[target_feature(enable = "avx512f")]
1736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1737#[cfg_attr(test, assert_instr(vmulps))]
1738pub fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1739 unsafe {
1740 let mul: f32x16 = _mm512_mul_ps(a, b).as_f32x16();
1741 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f32x16::ZERO))
1742 }
1743}
1744
1745/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1746///
1747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929)
1748#[inline]
1749#[target_feature(enable = "avx512f,avx512vl")]
1750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1751#[cfg_attr(test, assert_instr(vmulps))]
1752pub fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1753 unsafe {
1754 let mul: f32x8 = _mm256_mul_ps(a, b).as_f32x8();
1755 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x8()))
1756 }
1757}
1758
1759/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1760///
1761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930)
1762#[inline]
1763#[target_feature(enable = "avx512f,avx512vl")]
1764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1765#[cfg_attr(test, assert_instr(vmulps))]
1766pub fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1767 unsafe {
1768 let mul: f32x8 = _mm256_mul_ps(a, b).as_f32x8();
1769 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f32x8::ZERO))
1770 }
1771}
1772
1773/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1774///
1775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926)
1776#[inline]
1777#[target_feature(enable = "avx512f,avx512vl")]
1778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1779#[cfg_attr(test, assert_instr(vmulps))]
1780pub fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1781 unsafe {
1782 let mul: f32x4 = _mm_mul_ps(a, b).as_f32x4();
1783 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x4()))
1784 }
1785}
1786
1787/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1788///
1789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927)
1790#[inline]
1791#[target_feature(enable = "avx512f,avx512vl")]
1792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1793#[cfg_attr(test, assert_instr(vmulps))]
1794pub fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1795 unsafe {
1796 let mul: f32x4 = _mm_mul_ps(a, b).as_f32x4();
1797 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f32x4::ZERO))
1798 }
1799}
1800
1801/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
1802///
1803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925)
1804#[inline]
1805#[target_feature(enable = "avx512f")]
1806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1807#[cfg_attr(test, assert_instr(vmulpd))]
1808pub fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
1809 unsafe { transmute(src:simd_mul(x:a.as_f64x8(), y:b.as_f64x8())) }
1810}
1811
1812/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1813///
1814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923)
1815#[inline]
1816#[target_feature(enable = "avx512f")]
1817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1818#[cfg_attr(test, assert_instr(vmulpd))]
1819pub fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1820 unsafe {
1821 let mul: f64x8 = _mm512_mul_pd(a, b).as_f64x8();
1822 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x8()))
1823 }
1824}
1825
1826/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1827///
1828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924)
1829#[inline]
1830#[target_feature(enable = "avx512f")]
1831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1832#[cfg_attr(test, assert_instr(vmulpd))]
1833pub fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1834 unsafe {
1835 let mul: f64x8 = _mm512_mul_pd(a, b).as_f64x8();
1836 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f64x8::ZERO))
1837 }
1838}
1839
1840/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1841///
1842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920)
1843#[inline]
1844#[target_feature(enable = "avx512f,avx512vl")]
1845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1846#[cfg_attr(test, assert_instr(vmulpd))]
1847pub fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1848 unsafe {
1849 let mul: f64x4 = _mm256_mul_pd(a, b).as_f64x4();
1850 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x4()))
1851 }
1852}
1853
1854/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1855///
1856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921)
1857#[inline]
1858#[target_feature(enable = "avx512f,avx512vl")]
1859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1860#[cfg_attr(test, assert_instr(vmulpd))]
1861pub fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1862 unsafe {
1863 let mul: f64x4 = _mm256_mul_pd(a, b).as_f64x4();
1864 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f64x4::ZERO))
1865 }
1866}
1867
1868/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1869///
1870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917)
1871#[inline]
1872#[target_feature(enable = "avx512f,avx512vl")]
1873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1874#[cfg_attr(test, assert_instr(vmulpd))]
1875pub fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1876 unsafe {
1877 let mul: f64x2 = _mm_mul_pd(a, b).as_f64x2();
1878 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x2()))
1879 }
1880}
1881
1882/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1883///
1884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918)
1885#[inline]
1886#[target_feature(enable = "avx512f,avx512vl")]
1887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1888#[cfg_attr(test, assert_instr(vmulpd))]
1889pub fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1890 unsafe {
1891 let mul: f64x2 = _mm_mul_pd(a, b).as_f64x2();
1892 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f64x2::ZERO))
1893 }
1894}
1895
1896/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1897///
1898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162)
1899#[inline]
1900#[target_feature(enable = "avx512f")]
1901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1902#[cfg_attr(test, assert_instr(vdivps))]
1903pub fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
1904 unsafe { transmute(src:simd_div(lhs:a.as_f32x16(), rhs:b.as_f32x16())) }
1905}
1906
1907/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1908///
1909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163)
1910#[inline]
1911#[target_feature(enable = "avx512f")]
1912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1913#[cfg_attr(test, assert_instr(vdivps))]
1914pub fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1915 unsafe {
1916 let div: f32x16 = _mm512_div_ps(a, b).as_f32x16();
1917 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x16()))
1918 }
1919}
1920
1921/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1922///
1923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164)
1924#[inline]
1925#[target_feature(enable = "avx512f")]
1926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1927#[cfg_attr(test, assert_instr(vdivps))]
1928pub fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1929 unsafe {
1930 let div: f32x16 = _mm512_div_ps(a, b).as_f32x16();
1931 transmute(src:simd_select_bitmask(m:k, yes:div, no:f32x16::ZERO))
1932 }
1933}
1934
1935/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1936///
1937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160)
1938#[inline]
1939#[target_feature(enable = "avx512f,avx512vl")]
1940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1941#[cfg_attr(test, assert_instr(vdivps))]
1942pub fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1943 unsafe {
1944 let div: f32x8 = _mm256_div_ps(a, b).as_f32x8();
1945 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x8()))
1946 }
1947}
1948
1949/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1950///
1951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161)
1952#[inline]
1953#[target_feature(enable = "avx512f,avx512vl")]
1954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1955#[cfg_attr(test, assert_instr(vdivps))]
1956pub fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1957 unsafe {
1958 let div: f32x8 = _mm256_div_ps(a, b).as_f32x8();
1959 transmute(src:simd_select_bitmask(m:k, yes:div, no:f32x8::ZERO))
1960 }
1961}
1962
1963/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1964///
1965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157)
1966#[inline]
1967#[target_feature(enable = "avx512f,avx512vl")]
1968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1969#[cfg_attr(test, assert_instr(vdivps))]
1970pub fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1971 unsafe {
1972 let div: f32x4 = _mm_div_ps(a, b).as_f32x4();
1973 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x4()))
1974 }
1975}
1976
1977/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1978///
1979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158)
1980#[inline]
1981#[target_feature(enable = "avx512f,avx512vl")]
1982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1983#[cfg_attr(test, assert_instr(vdivps))]
1984pub fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1985 unsafe {
1986 let div: f32x4 = _mm_div_ps(a, b).as_f32x4();
1987 transmute(src:simd_select_bitmask(m:k, yes:div, no:f32x4::ZERO))
1988 }
1989}
1990
1991/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1992///
1993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_pd&expand=2153)
1994#[inline]
1995#[target_feature(enable = "avx512f")]
1996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
1997#[cfg_attr(test, assert_instr(vdivpd))]
1998pub fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
1999 unsafe { transmute(src:simd_div(lhs:a.as_f64x8(), rhs:b.as_f64x8())) }
2000}
2001
2002/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2003///
2004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154)
2005#[inline]
2006#[target_feature(enable = "avx512f")]
2007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2008#[cfg_attr(test, assert_instr(vdivpd))]
2009pub fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2010 unsafe {
2011 let div: f64x8 = _mm512_div_pd(a, b).as_f64x8();
2012 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x8()))
2013 }
2014}
2015
2016/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2017///
2018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155)
2019#[inline]
2020#[target_feature(enable = "avx512f")]
2021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2022#[cfg_attr(test, assert_instr(vdivpd))]
2023pub fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2024 unsafe {
2025 let div: f64x8 = _mm512_div_pd(a, b).as_f64x8();
2026 transmute(src:simd_select_bitmask(m:k, yes:div, no:f64x8::ZERO))
2027 }
2028}
2029
2030/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2031///
2032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151)
2033#[inline]
2034#[target_feature(enable = "avx512f,avx512vl")]
2035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2036#[cfg_attr(test, assert_instr(vdivpd))]
2037pub fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2038 unsafe {
2039 let div: f64x4 = _mm256_div_pd(a, b).as_f64x4();
2040 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x4()))
2041 }
2042}
2043
2044/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2045///
2046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152)
2047#[inline]
2048#[target_feature(enable = "avx512f,avx512vl")]
2049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2050#[cfg_attr(test, assert_instr(vdivpd))]
2051pub fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2052 unsafe {
2053 let div: f64x4 = _mm256_div_pd(a, b).as_f64x4();
2054 transmute(src:simd_select_bitmask(m:k, yes:div, no:f64x4::ZERO))
2055 }
2056}
2057
2058/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2059///
2060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148)
2061#[inline]
2062#[target_feature(enable = "avx512f,avx512vl")]
2063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2064#[cfg_attr(test, assert_instr(vdivpd))]
2065pub fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2066 unsafe {
2067 let div: f64x2 = _mm_div_pd(a, b).as_f64x2();
2068 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x2()))
2069 }
2070}
2071
2072/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2073///
2074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149)
2075#[inline]
2076#[target_feature(enable = "avx512f,avx512vl")]
2077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2078#[cfg_attr(test, assert_instr(vdivpd))]
2079pub fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2080 unsafe {
2081 let div: f64x2 = _mm_div_pd(a, b).as_f64x2();
2082 transmute(src:simd_select_bitmask(m:k, yes:div, no:f64x2::ZERO))
2083 }
2084}
2085
2086/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
2087///
2088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582)
2089#[inline]
2090#[target_feature(enable = "avx512f")]
2091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2092#[cfg_attr(test, assert_instr(vpmaxsd))]
2093pub fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
2094 unsafe {
2095 let a: i32x16 = a.as_i32x16();
2096 let b: i32x16 = b.as_i32x16();
2097 transmute(src:simd_select::<i32x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2098 }
2099}
2100
2101/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2102///
2103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580)
2104#[inline]
2105#[target_feature(enable = "avx512f")]
2106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2107#[cfg_attr(test, assert_instr(vpmaxsd))]
2108pub fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2109 unsafe {
2110 let max: i32x16 = _mm512_max_epi32(a, b).as_i32x16();
2111 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x16()))
2112 }
2113}
2114
2115/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2116///
2117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581)
2118#[inline]
2119#[target_feature(enable = "avx512f")]
2120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2121#[cfg_attr(test, assert_instr(vpmaxsd))]
2122pub fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2123 unsafe {
2124 let max: i32x16 = _mm512_max_epi32(a, b).as_i32x16();
2125 transmute(src:simd_select_bitmask(m:k, yes:max, no:i32x16::ZERO))
2126 }
2127}
2128
2129/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2130///
2131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577)
2132#[inline]
2133#[target_feature(enable = "avx512f,avx512vl")]
2134#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2135#[cfg_attr(test, assert_instr(vpmaxsd))]
2136pub fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2137 unsafe {
2138 let max: i32x8 = _mm256_max_epi32(a, b).as_i32x8();
2139 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x8()))
2140 }
2141}
2142
2143/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2144///
2145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578)
2146#[inline]
2147#[target_feature(enable = "avx512f,avx512vl")]
2148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2149#[cfg_attr(test, assert_instr(vpmaxsd))]
2150pub fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2151 unsafe {
2152 let max: i32x8 = _mm256_max_epi32(a, b).as_i32x8();
2153 transmute(src:simd_select_bitmask(m:k, yes:max, no:i32x8::ZERO))
2154 }
2155}
2156
2157/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2158///
2159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574)
2160#[inline]
2161#[target_feature(enable = "avx512f,avx512vl")]
2162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2163#[cfg_attr(test, assert_instr(vpmaxsd))]
2164pub fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2165 unsafe {
2166 let max: i32x4 = _mm_max_epi32(a, b).as_i32x4();
2167 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x4()))
2168 }
2169}
2170
2171/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2172///
2173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575)
2174#[inline]
2175#[target_feature(enable = "avx512f,avx512vl")]
2176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2177#[cfg_attr(test, assert_instr(vpmaxsd))]
2178pub fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2179 unsafe {
2180 let max: i32x4 = _mm_max_epi32(a, b).as_i32x4();
2181 transmute(src:simd_select_bitmask(m:k, yes:max, no:i32x4::ZERO))
2182 }
2183}
2184
2185/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2186///
2187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591)
2188#[inline]
2189#[target_feature(enable = "avx512f")]
2190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2191#[cfg_attr(test, assert_instr(vpmaxsq))]
2192pub fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
2193 unsafe {
2194 let a: i64x8 = a.as_i64x8();
2195 let b: i64x8 = b.as_i64x8();
2196 transmute(src:simd_select::<i64x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2197 }
2198}
2199
2200/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2201///
2202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589)
2203#[inline]
2204#[target_feature(enable = "avx512f")]
2205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2206#[cfg_attr(test, assert_instr(vpmaxsq))]
2207pub fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2208 unsafe {
2209 let max: i64x8 = _mm512_max_epi64(a, b).as_i64x8();
2210 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x8()))
2211 }
2212}
2213
2214/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2215///
2216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590)
2217#[inline]
2218#[target_feature(enable = "avx512f")]
2219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2220#[cfg_attr(test, assert_instr(vpmaxsq))]
2221pub fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2222 unsafe {
2223 let max: i64x8 = _mm512_max_epi64(a, b).as_i64x8();
2224 transmute(src:simd_select_bitmask(m:k, yes:max, no:i64x8::ZERO))
2225 }
2226}
2227
2228/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2229///
2230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588)
2231#[inline]
2232#[target_feature(enable = "avx512f,avx512vl")]
2233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2234#[cfg_attr(test, assert_instr(vpmaxsq))]
2235pub fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
2236 unsafe {
2237 let a: i64x4 = a.as_i64x4();
2238 let b: i64x4 = b.as_i64x4();
2239 transmute(src:simd_select::<i64x4, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2240 }
2241}
2242
2243/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2244///
2245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586)
2246#[inline]
2247#[target_feature(enable = "avx512f,avx512vl")]
2248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2249#[cfg_attr(test, assert_instr(vpmaxsq))]
2250pub fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2251 unsafe {
2252 let max: i64x4 = _mm256_max_epi64(a, b).as_i64x4();
2253 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x4()))
2254 }
2255}
2256
2257/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2258///
2259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587)
2260#[inline]
2261#[target_feature(enable = "avx512f,avx512vl")]
2262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2263#[cfg_attr(test, assert_instr(vpmaxsq))]
2264pub fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2265 unsafe {
2266 let max: i64x4 = _mm256_max_epi64(a, b).as_i64x4();
2267 transmute(src:simd_select_bitmask(m:k, yes:max, no:i64x4::ZERO))
2268 }
2269}
2270
2271/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2272///
2273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585)
2274#[inline]
2275#[target_feature(enable = "avx512f,avx512vl")]
2276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2277#[cfg_attr(test, assert_instr(vpmaxsq))]
2278pub fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
2279 unsafe {
2280 let a: i64x2 = a.as_i64x2();
2281 let b: i64x2 = b.as_i64x2();
2282 transmute(src:simd_select::<i64x2, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2283 }
2284}
2285
2286/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2287///
2288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583)
2289#[inline]
2290#[target_feature(enable = "avx512f,avx512vl")]
2291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2292#[cfg_attr(test, assert_instr(vpmaxsq))]
2293pub fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2294 unsafe {
2295 let max: i64x2 = _mm_max_epi64(a, b).as_i64x2();
2296 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x2()))
2297 }
2298}
2299
2300/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2301///
2302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584)
2303#[inline]
2304#[target_feature(enable = "avx512f,avx512vl")]
2305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2306#[cfg_attr(test, assert_instr(vpmaxsq))]
2307pub fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2308 unsafe {
2309 let max: i64x2 = _mm_max_epi64(a, b).as_i64x2();
2310 transmute(src:simd_select_bitmask(m:k, yes:max, no:i64x2::ZERO))
2311 }
2312}
2313
2314/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
2315///
2316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655)
2317#[inline]
2318#[target_feature(enable = "avx512f")]
2319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2320#[cfg_attr(test, assert_instr(vmaxps))]
2321pub fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
2322 unsafe {
2323 transmute(src:vmaxps(
2324 a.as_f32x16(),
2325 b.as_f32x16(),
2326 _MM_FROUND_CUR_DIRECTION,
2327 ))
2328 }
2329}
2330
2331/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2332///
2333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653)
2334#[inline]
2335#[target_feature(enable = "avx512f")]
2336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2337#[cfg_attr(test, assert_instr(vmaxps))]
2338pub fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2339 unsafe {
2340 let max: f32x16 = _mm512_max_ps(a, b).as_f32x16();
2341 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x16()))
2342 }
2343}
2344
2345/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2346///
2347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654)
2348#[inline]
2349#[target_feature(enable = "avx512f")]
2350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2351#[cfg_attr(test, assert_instr(vmaxps))]
2352pub fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2353 unsafe {
2354 let max: f32x16 = _mm512_max_ps(a, b).as_f32x16();
2355 transmute(src:simd_select_bitmask(m:k, yes:max, no:f32x16::ZERO))
2356 }
2357}
2358
2359/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2360///
2361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650)
2362#[inline]
2363#[target_feature(enable = "avx512f,avx512vl")]
2364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2365#[cfg_attr(test, assert_instr(vmaxps))]
2366pub fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2367 unsafe {
2368 let max: f32x8 = _mm256_max_ps(a, b).as_f32x8();
2369 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x8()))
2370 }
2371}
2372
2373/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2374///
2375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651)
2376#[inline]
2377#[target_feature(enable = "avx512f,avx512vl")]
2378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2379#[cfg_attr(test, assert_instr(vmaxps))]
2380pub fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2381 unsafe {
2382 let max: f32x8 = _mm256_max_ps(a, b).as_f32x8();
2383 transmute(src:simd_select_bitmask(m:k, yes:max, no:f32x8::ZERO))
2384 }
2385}
2386
2387/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2388///
2389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647)
2390#[inline]
2391#[target_feature(enable = "avx512f,avx512vl")]
2392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2393#[cfg_attr(test, assert_instr(vmaxps))]
2394pub fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2395 unsafe {
2396 let max: f32x4 = _mm_max_ps(a, b).as_f32x4();
2397 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x4()))
2398 }
2399}
2400
2401/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2402///
2403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648)
2404#[inline]
2405#[target_feature(enable = "avx512f,avx512vl")]
2406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2407#[cfg_attr(test, assert_instr(vmaxps))]
2408pub fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2409 unsafe {
2410 let max: f32x4 = _mm_max_ps(a, b).as_f32x4();
2411 transmute(src:simd_select_bitmask(m:k, yes:max, no:f32x4::ZERO))
2412 }
2413}
2414
2415/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
2416///
2417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645)
2418#[inline]
2419#[target_feature(enable = "avx512f")]
2420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2421#[cfg_attr(test, assert_instr(vmaxpd))]
2422pub fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
2423 unsafe { transmute(src:vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
2424}
2425
2426/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2427///
2428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643)
2429#[inline]
2430#[target_feature(enable = "avx512f")]
2431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2432#[cfg_attr(test, assert_instr(vmaxpd))]
2433pub fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2434 unsafe {
2435 let max: f64x8 = _mm512_max_pd(a, b).as_f64x8();
2436 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x8()))
2437 }
2438}
2439
2440/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2441///
2442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644)
2443#[inline]
2444#[target_feature(enable = "avx512f")]
2445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2446#[cfg_attr(test, assert_instr(vmaxpd))]
2447pub fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2448 unsafe {
2449 let max: f64x8 = _mm512_max_pd(a, b).as_f64x8();
2450 transmute(src:simd_select_bitmask(m:k, yes:max, no:f64x8::ZERO))
2451 }
2452}
2453
2454/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2455///
2456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640)
2457#[inline]
2458#[target_feature(enable = "avx512f,avx512vl")]
2459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2460#[cfg_attr(test, assert_instr(vmaxpd))]
2461pub fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2462 unsafe {
2463 let max: f64x4 = _mm256_max_pd(a, b).as_f64x4();
2464 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x4()))
2465 }
2466}
2467
2468/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2469///
2470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641)
2471#[inline]
2472#[target_feature(enable = "avx512f,avx512vl")]
2473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2474#[cfg_attr(test, assert_instr(vmaxpd))]
2475pub fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2476 unsafe {
2477 let max: f64x4 = _mm256_max_pd(a, b).as_f64x4();
2478 transmute(src:simd_select_bitmask(m:k, yes:max, no:f64x4::ZERO))
2479 }
2480}
2481
2482/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2483///
2484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637)
2485#[inline]
2486#[target_feature(enable = "avx512f,avx512vl")]
2487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2488#[cfg_attr(test, assert_instr(vmaxpd))]
2489pub fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2490 unsafe {
2491 let max: f64x2 = _mm_max_pd(a, b).as_f64x2();
2492 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x2()))
2493 }
2494}
2495
2496/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2497///
2498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638)
2499#[inline]
2500#[target_feature(enable = "avx512f,avx512vl")]
2501#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2502#[cfg_attr(test, assert_instr(vmaxpd))]
2503pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2504 unsafe {
2505 let max: f64x2 = _mm_max_pd(a, b).as_f64x2();
2506 transmute(src:simd_select_bitmask(m:k, yes:max, no:f64x2::ZERO))
2507 }
2508}
2509
2510/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
2511///
2512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618)
2513#[inline]
2514#[target_feature(enable = "avx512f")]
2515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2516#[cfg_attr(test, assert_instr(vpmaxud))]
2517pub fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
2518 unsafe {
2519 let a: u32x16 = a.as_u32x16();
2520 let b: u32x16 = b.as_u32x16();
2521 transmute(src:simd_select::<i32x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2522 }
2523}
2524
2525/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2526///
2527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616)
2528#[inline]
2529#[target_feature(enable = "avx512f")]
2530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2531#[cfg_attr(test, assert_instr(vpmaxud))]
2532pub fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2533 unsafe {
2534 let max: u32x16 = _mm512_max_epu32(a, b).as_u32x16();
2535 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x16()))
2536 }
2537}
2538
2539/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2540///
2541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617)
2542#[inline]
2543#[target_feature(enable = "avx512f")]
2544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2545#[cfg_attr(test, assert_instr(vpmaxud))]
2546pub fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2547 unsafe {
2548 let max: u32x16 = _mm512_max_epu32(a, b).as_u32x16();
2549 transmute(src:simd_select_bitmask(m:k, yes:max, no:u32x16::ZERO))
2550 }
2551}
2552
2553/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2554///
2555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613)
2556#[inline]
2557#[target_feature(enable = "avx512f,avx512vl")]
2558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2559#[cfg_attr(test, assert_instr(vpmaxud))]
2560pub fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2561 unsafe {
2562 let max: u32x8 = _mm256_max_epu32(a, b).as_u32x8();
2563 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x8()))
2564 }
2565}
2566
2567/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2568///
2569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614)
2570#[inline]
2571#[target_feature(enable = "avx512f,avx512vl")]
2572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2573#[cfg_attr(test, assert_instr(vpmaxud))]
2574pub fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2575 unsafe {
2576 let max: u32x8 = _mm256_max_epu32(a, b).as_u32x8();
2577 transmute(src:simd_select_bitmask(m:k, yes:max, no:u32x8::ZERO))
2578 }
2579}
2580
2581/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2582///
2583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610)
2584#[inline]
2585#[target_feature(enable = "avx512f,avx512vl")]
2586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2587#[cfg_attr(test, assert_instr(vpmaxud))]
2588pub fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2589 unsafe {
2590 let max: u32x4 = _mm_max_epu32(a, b).as_u32x4();
2591 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x4()))
2592 }
2593}
2594
2595/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2596///
2597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611)
2598#[inline]
2599#[target_feature(enable = "avx512f,avx512vl")]
2600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2601#[cfg_attr(test, assert_instr(vpmaxud))]
2602pub fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2603 unsafe {
2604 let max: u32x4 = _mm_max_epu32(a, b).as_u32x4();
2605 transmute(src:simd_select_bitmask(m:k, yes:max, no:u32x4::ZERO))
2606 }
2607}
2608
2609/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2610///
2611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627)
2612#[inline]
2613#[target_feature(enable = "avx512f")]
2614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2615#[cfg_attr(test, assert_instr(vpmaxuq))]
2616pub fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
2617 unsafe {
2618 let a: u64x8 = a.as_u64x8();
2619 let b: u64x8 = b.as_u64x8();
2620 transmute(src:simd_select::<i64x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2621 }
2622}
2623
2624/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2625///
2626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625)
2627#[inline]
2628#[target_feature(enable = "avx512f")]
2629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2630#[cfg_attr(test, assert_instr(vpmaxuq))]
2631pub fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2632 unsafe {
2633 let max: u64x8 = _mm512_max_epu64(a, b).as_u64x8();
2634 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x8()))
2635 }
2636}
2637
2638/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2639///
2640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu64&expand=3626)
2641#[inline]
2642#[target_feature(enable = "avx512f")]
2643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2644#[cfg_attr(test, assert_instr(vpmaxuq))]
2645pub fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2646 unsafe {
2647 let max: u64x8 = _mm512_max_epu64(a, b).as_u64x8();
2648 transmute(src:simd_select_bitmask(m:k, yes:max, no:u64x8::ZERO))
2649 }
2650}
2651
2652/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2653///
2654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624)
2655#[inline]
2656#[target_feature(enable = "avx512f,avx512vl")]
2657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2658#[cfg_attr(test, assert_instr(vpmaxuq))]
2659pub fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
2660 unsafe {
2661 let a: u64x4 = a.as_u64x4();
2662 let b: u64x4 = b.as_u64x4();
2663 transmute(src:simd_select::<i64x4, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2664 }
2665}
2666
2667/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2668///
2669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622)
2670#[inline]
2671#[target_feature(enable = "avx512f,avx512vl")]
2672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2673#[cfg_attr(test, assert_instr(vpmaxuq))]
2674pub fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2675 unsafe {
2676 let max: u64x4 = _mm256_max_epu64(a, b).as_u64x4();
2677 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x4()))
2678 }
2679}
2680
2681/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2682///
2683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623)
2684#[inline]
2685#[target_feature(enable = "avx512f,avx512vl")]
2686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2687#[cfg_attr(test, assert_instr(vpmaxuq))]
2688pub fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2689 unsafe {
2690 let max: u64x4 = _mm256_max_epu64(a, b).as_u64x4();
2691 transmute(src:simd_select_bitmask(m:k, yes:max, no:u64x4::ZERO))
2692 }
2693}
2694
2695/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2696///
2697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621)
2698#[inline]
2699#[target_feature(enable = "avx512f,avx512vl")]
2700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2701#[cfg_attr(test, assert_instr(vpmaxuq))]
2702pub fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
2703 unsafe {
2704 let a: u64x2 = a.as_u64x2();
2705 let b: u64x2 = b.as_u64x2();
2706 transmute(src:simd_select::<i64x2, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2707 }
2708}
2709
2710/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2711///
2712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619)
2713#[inline]
2714#[target_feature(enable = "avx512f,avx512vl")]
2715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2716#[cfg_attr(test, assert_instr(vpmaxuq))]
2717pub fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2718 unsafe {
2719 let max: u64x2 = _mm_max_epu64(a, b).as_u64x2();
2720 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x2()))
2721 }
2722}
2723
2724/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2725///
2726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620)
2727#[inline]
2728#[target_feature(enable = "avx512f,avx512vl")]
2729#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2730#[cfg_attr(test, assert_instr(vpmaxuq))]
2731pub fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2732 unsafe {
2733 let max: u64x2 = _mm_max_epu64(a, b).as_u64x2();
2734 transmute(src:simd_select_bitmask(m:k, yes:max, no:u64x2::ZERO))
2735 }
2736}
2737
2738/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
2739///
2740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696)
2741#[inline]
2742#[target_feature(enable = "avx512f")]
2743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2744#[cfg_attr(test, assert_instr(vpminsd))]
2745pub fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
2746 unsafe {
2747 let a: i32x16 = a.as_i32x16();
2748 let b: i32x16 = b.as_i32x16();
2749 transmute(src:simd_select::<i32x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2750 }
2751}
2752
2753/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2754///
2755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694)
2756#[inline]
2757#[target_feature(enable = "avx512f")]
2758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2759#[cfg_attr(test, assert_instr(vpminsd))]
2760pub fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2761 unsafe {
2762 let min: i32x16 = _mm512_min_epi32(a, b).as_i32x16();
2763 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x16()))
2764 }
2765}
2766
2767/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2768///
2769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695)
2770#[inline]
2771#[target_feature(enable = "avx512f")]
2772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2773#[cfg_attr(test, assert_instr(vpminsd))]
2774pub fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2775 unsafe {
2776 let min: i32x16 = _mm512_min_epi32(a, b).as_i32x16();
2777 transmute(src:simd_select_bitmask(m:k, yes:min, no:i32x16::ZERO))
2778 }
2779}
2780
2781/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2782///
2783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691)
2784#[inline]
2785#[target_feature(enable = "avx512f,avx512vl")]
2786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2787#[cfg_attr(test, assert_instr(vpminsd))]
2788pub fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2789 unsafe {
2790 let min: i32x8 = _mm256_min_epi32(a, b).as_i32x8();
2791 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x8()))
2792 }
2793}
2794
2795/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2796///
2797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692)
2798#[inline]
2799#[target_feature(enable = "avx512f,avx512vl")]
2800#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2801#[cfg_attr(test, assert_instr(vpminsd))]
2802pub fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2803 unsafe {
2804 let min: i32x8 = _mm256_min_epi32(a, b).as_i32x8();
2805 transmute(src:simd_select_bitmask(m:k, yes:min, no:i32x8::ZERO))
2806 }
2807}
2808
2809/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2810///
2811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688)
2812#[inline]
2813#[target_feature(enable = "avx512f,avx512vl")]
2814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2815#[cfg_attr(test, assert_instr(vpminsd))]
2816pub fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2817 unsafe {
2818 let min: i32x4 = _mm_min_epi32(a, b).as_i32x4();
2819 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x4()))
2820 }
2821}
2822
2823/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2824///
2825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689)
2826#[inline]
2827#[target_feature(enable = "avx512f,avx512vl")]
2828#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2829#[cfg_attr(test, assert_instr(vpminsd))]
2830pub fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2831 unsafe {
2832 let min: i32x4 = _mm_min_epi32(a, b).as_i32x4();
2833 transmute(src:simd_select_bitmask(m:k, yes:min, no:i32x4::ZERO))
2834 }
2835}
2836
2837/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2838///
2839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705)
2840#[inline]
2841#[target_feature(enable = "avx512f")]
2842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2843#[cfg_attr(test, assert_instr(vpminsq))]
2844pub fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
2845 unsafe {
2846 let a: i64x8 = a.as_i64x8();
2847 let b: i64x8 = b.as_i64x8();
2848 transmute(src:simd_select::<i64x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2849 }
2850}
2851
2852/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2853///
2854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703)
2855#[inline]
2856#[target_feature(enable = "avx512f")]
2857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2858#[cfg_attr(test, assert_instr(vpminsq))]
2859pub fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2860 unsafe {
2861 let min: i64x8 = _mm512_min_epi64(a, b).as_i64x8();
2862 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x8()))
2863 }
2864}
2865
2866/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2867///
2868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi64&expand=3704)
2869#[inline]
2870#[target_feature(enable = "avx512f")]
2871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2872#[cfg_attr(test, assert_instr(vpminsq))]
2873pub fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2874 unsafe {
2875 let min: i64x8 = _mm512_min_epi64(a, b).as_i64x8();
2876 transmute(src:simd_select_bitmask(m:k, yes:min, no:i64x8::ZERO))
2877 }
2878}
2879
2880/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2881///
2882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702)
2883#[inline]
2884#[target_feature(enable = "avx512f,avx512vl")]
2885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2886#[cfg_attr(test, assert_instr(vpminsq))]
2887pub fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
2888 unsafe {
2889 let a: i64x4 = a.as_i64x4();
2890 let b: i64x4 = b.as_i64x4();
2891 transmute(src:simd_select::<i64x4, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2892 }
2893}
2894
2895/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2896///
2897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700)
2898#[inline]
2899#[target_feature(enable = "avx512f,avx512vl")]
2900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2901#[cfg_attr(test, assert_instr(vpminsq))]
2902pub fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2903 unsafe {
2904 let min: i64x4 = _mm256_min_epi64(a, b).as_i64x4();
2905 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x4()))
2906 }
2907}
2908
2909/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2910///
2911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701)
2912#[inline]
2913#[target_feature(enable = "avx512f,avx512vl")]
2914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2915#[cfg_attr(test, assert_instr(vpminsq))]
2916pub fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2917 unsafe {
2918 let min: i64x4 = _mm256_min_epi64(a, b).as_i64x4();
2919 transmute(src:simd_select_bitmask(m:k, yes:min, no:i64x4::ZERO))
2920 }
2921}
2922
2923/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2924///
2925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64)
2926#[inline]
2927#[target_feature(enable = "avx512f,avx512vl")]
2928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2929#[cfg_attr(test, assert_instr(vpminsq))]
2930pub fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
2931 unsafe {
2932 let a: i64x2 = a.as_i64x2();
2933 let b: i64x2 = b.as_i64x2();
2934 transmute(src:simd_select::<i64x2, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2935 }
2936}
2937
2938/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2939///
2940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64)
2941#[inline]
2942#[target_feature(enable = "avx512f,avx512vl")]
2943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2944#[cfg_attr(test, assert_instr(vpminsq))]
2945pub fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2946 unsafe {
2947 let min: i64x2 = _mm_min_epi64(a, b).as_i64x2();
2948 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x2()))
2949 }
2950}
2951
2952/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2953///
2954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64)
2955#[inline]
2956#[target_feature(enable = "avx512f,avx512vl")]
2957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2958#[cfg_attr(test, assert_instr(vpminsq))]
2959pub fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2960 unsafe {
2961 let min: i64x2 = _mm_min_epi64(a, b).as_i64x2();
2962 transmute(src:simd_select_bitmask(m:k, yes:min, no:i64x2::ZERO))
2963 }
2964}
2965
2966/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
2967///
2968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
2969#[inline]
2970#[target_feature(enable = "avx512f")]
2971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2972#[cfg_attr(test, assert_instr(vminps))]
2973pub fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
2974 unsafe {
2975 transmute(src:vminps(
2976 a.as_f32x16(),
2977 b.as_f32x16(),
2978 _MM_FROUND_CUR_DIRECTION,
2979 ))
2980 }
2981}
2982
2983/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2984///
2985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767)
2986#[inline]
2987#[target_feature(enable = "avx512f")]
2988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
2989#[cfg_attr(test, assert_instr(vminps))]
2990pub fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2991 unsafe {
2992 let min: f32x16 = _mm512_min_ps(a, b).as_f32x16();
2993 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x16()))
2994 }
2995}
2996
2997/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2998///
2999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768)
3000#[inline]
3001#[target_feature(enable = "avx512f")]
3002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3003#[cfg_attr(test, assert_instr(vminps))]
3004pub fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
3005 unsafe {
3006 let min: f32x16 = _mm512_min_ps(a, b).as_f32x16();
3007 transmute(src:simd_select_bitmask(m:k, yes:min, no:f32x16::ZERO))
3008 }
3009}
3010
3011/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3012///
3013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764)
3014#[inline]
3015#[target_feature(enable = "avx512f,avx512vl")]
3016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3017#[cfg_attr(test, assert_instr(vminps))]
3018pub fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
3019 unsafe {
3020 let min: f32x8 = _mm256_min_ps(a, b).as_f32x8();
3021 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x8()))
3022 }
3023}
3024
3025/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3026///
3027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765)
3028#[inline]
3029#[target_feature(enable = "avx512f,avx512vl")]
3030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3031#[cfg_attr(test, assert_instr(vminps))]
3032pub fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
3033 unsafe {
3034 let min: f32x8 = _mm256_min_ps(a, b).as_f32x8();
3035 transmute(src:simd_select_bitmask(m:k, yes:min, no:f32x8::ZERO))
3036 }
3037}
3038
3039/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3040///
3041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761)
3042#[inline]
3043#[target_feature(enable = "avx512f,avx512vl")]
3044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3045#[cfg_attr(test, assert_instr(vminps))]
3046pub fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
3047 unsafe {
3048 let min: f32x4 = _mm_min_ps(a, b).as_f32x4();
3049 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x4()))
3050 }
3051}
3052
3053/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3054///
3055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762)
3056#[inline]
3057#[target_feature(enable = "avx512f,avx512vl")]
3058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3059#[cfg_attr(test, assert_instr(vminps))]
3060pub fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
3061 unsafe {
3062 let min: f32x4 = _mm_min_ps(a, b).as_f32x4();
3063 transmute(src:simd_select_bitmask(m:k, yes:min, no:f32x4::ZERO))
3064 }
3065}
3066
3067/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
3068///
3069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_pd&expand=3759)
3070#[inline]
3071#[target_feature(enable = "avx512f")]
3072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3073#[cfg_attr(test, assert_instr(vminpd))]
3074pub fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
3075 unsafe { transmute(src:vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
3076}
3077
3078/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3079///
3080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_pd&expand=3757)
3081#[inline]
3082#[target_feature(enable = "avx512f")]
3083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3084#[cfg_attr(test, assert_instr(vminpd))]
3085pub fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3086 unsafe {
3087 let min: f64x8 = _mm512_min_pd(a, b).as_f64x8();
3088 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x8()))
3089 }
3090}
3091
3092/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3093///
3094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_pd&expand=3758)
3095#[inline]
3096#[target_feature(enable = "avx512f")]
3097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3098#[cfg_attr(test, assert_instr(vminpd))]
3099pub fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3100 unsafe {
3101 let min: f64x8 = _mm512_min_pd(a, b).as_f64x8();
3102 transmute(src:simd_select_bitmask(m:k, yes:min, no:f64x8::ZERO))
3103 }
3104}
3105
3106/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3107///
3108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754)
3109#[inline]
3110#[target_feature(enable = "avx512f,avx512vl")]
3111#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3112#[cfg_attr(test, assert_instr(vminpd))]
3113pub fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3114 unsafe {
3115 let min: f64x4 = _mm256_min_pd(a, b).as_f64x4();
3116 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x4()))
3117 }
3118}
3119
3120/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3121///
3122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755)
3123#[inline]
3124#[target_feature(enable = "avx512f,avx512vl")]
3125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3126#[cfg_attr(test, assert_instr(vminpd))]
3127pub fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3128 unsafe {
3129 let min: f64x4 = _mm256_min_pd(a, b).as_f64x4();
3130 transmute(src:simd_select_bitmask(m:k, yes:min, no:f64x4::ZERO))
3131 }
3132}
3133
3134/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3135///
3136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751)
3137#[inline]
3138#[target_feature(enable = "avx512f,avx512vl")]
3139#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3140#[cfg_attr(test, assert_instr(vminpd))]
3141pub fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3142 unsafe {
3143 let min: f64x2 = _mm_min_pd(a, b).as_f64x2();
3144 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x2()))
3145 }
3146}
3147
3148/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3149///
3150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752)
3151#[inline]
3152#[target_feature(enable = "avx512f,avx512vl")]
3153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3154#[cfg_attr(test, assert_instr(vminpd))]
3155pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3156 unsafe {
3157 let min: f64x2 = _mm_min_pd(a, b).as_f64x2();
3158 transmute(src:simd_select_bitmask(m:k, yes:min, no:f64x2::ZERO))
3159 }
3160}
3161
3162/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
3163///
3164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732)
3165#[inline]
3166#[target_feature(enable = "avx512f")]
3167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3168#[cfg_attr(test, assert_instr(vpminud))]
3169pub fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
3170 unsafe {
3171 let a: u32x16 = a.as_u32x16();
3172 let b: u32x16 = b.as_u32x16();
3173 transmute(src:simd_select::<i32x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
3174 }
3175}
3176
3177/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3178///
3179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730)
3180#[inline]
3181#[target_feature(enable = "avx512f")]
3182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3183#[cfg_attr(test, assert_instr(vpminud))]
3184pub fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3185 unsafe {
3186 let min: u32x16 = _mm512_min_epu32(a, b).as_u32x16();
3187 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x16()))
3188 }
3189}
3190
3191/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3192///
3193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731)
3194#[inline]
3195#[target_feature(enable = "avx512f")]
3196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3197#[cfg_attr(test, assert_instr(vpminud))]
3198pub fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3199 unsafe {
3200 let min: u32x16 = _mm512_min_epu32(a, b).as_u32x16();
3201 transmute(src:simd_select_bitmask(m:k, yes:min, no:u32x16::ZERO))
3202 }
3203}
3204
3205/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3206///
3207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727)
3208#[inline]
3209#[target_feature(enable = "avx512f,avx512vl")]
3210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3211#[cfg_attr(test, assert_instr(vpminud))]
3212pub fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3213 unsafe {
3214 let min: u32x8 = _mm256_min_epu32(a, b).as_u32x8();
3215 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x8()))
3216 }
3217}
3218
3219/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3220///
3221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728)
3222#[inline]
3223#[target_feature(enable = "avx512f,avx512vl")]
3224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3225#[cfg_attr(test, assert_instr(vpminud))]
3226pub fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3227 unsafe {
3228 let min: u32x8 = _mm256_min_epu32(a, b).as_u32x8();
3229 transmute(src:simd_select_bitmask(m:k, yes:min, no:u32x8::ZERO))
3230 }
3231}
3232
3233/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3234///
3235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724)
3236#[inline]
3237#[target_feature(enable = "avx512f,avx512vl")]
3238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3239#[cfg_attr(test, assert_instr(vpminud))]
3240pub fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3241 unsafe {
3242 let min: u32x4 = _mm_min_epu32(a, b).as_u32x4();
3243 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x4()))
3244 }
3245}
3246
3247/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3248///
3249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725)
3250#[inline]
3251#[target_feature(enable = "avx512f,avx512vl")]
3252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3253#[cfg_attr(test, assert_instr(vpminud))]
3254pub fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3255 unsafe {
3256 let min: u32x4 = _mm_min_epu32(a, b).as_u32x4();
3257 transmute(src:simd_select_bitmask(m:k, yes:min, no:u32x4::ZERO))
3258 }
3259}
3260
3261/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3262///
3263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741)
3264#[inline]
3265#[target_feature(enable = "avx512f")]
3266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3267#[cfg_attr(test, assert_instr(vpminuq))]
3268pub fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
3269 unsafe {
3270 let a: u64x8 = a.as_u64x8();
3271 let b: u64x8 = b.as_u64x8();
3272 transmute(src:simd_select::<i64x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
3273 }
3274}
3275
3276/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3277///
3278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739)
3279#[inline]
3280#[target_feature(enable = "avx512f")]
3281#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3282#[cfg_attr(test, assert_instr(vpminuq))]
3283pub fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3284 unsafe {
3285 let min: u64x8 = _mm512_min_epu64(a, b).as_u64x8();
3286 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x8()))
3287 }
3288}
3289
3290/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3291///
3292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740)
3293#[inline]
3294#[target_feature(enable = "avx512f")]
3295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3296#[cfg_attr(test, assert_instr(vpminuq))]
3297pub fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3298 unsafe {
3299 let min: u64x8 = _mm512_min_epu64(a, b).as_u64x8();
3300 transmute(src:simd_select_bitmask(m:k, yes:min, no:u64x8::ZERO))
3301 }
3302}
3303
3304/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3305///
3306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738)
3307#[inline]
3308#[target_feature(enable = "avx512f,avx512vl")]
3309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3310#[cfg_attr(test, assert_instr(vpminuq))]
3311pub fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
3312 unsafe {
3313 let a: u64x4 = a.as_u64x4();
3314 let b: u64x4 = b.as_u64x4();
3315 transmute(src:simd_select::<i64x4, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
3316 }
3317}
3318
3319/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3320///
3321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736)
3322#[inline]
3323#[target_feature(enable = "avx512f,avx512vl")]
3324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3325#[cfg_attr(test, assert_instr(vpminuq))]
3326pub fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3327 unsafe {
3328 let min: u64x4 = _mm256_min_epu64(a, b).as_u64x4();
3329 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x4()))
3330 }
3331}
3332
3333/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3334///
3335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737)
3336#[inline]
3337#[target_feature(enable = "avx512f,avx512vl")]
3338#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3339#[cfg_attr(test, assert_instr(vpminuq))]
3340pub fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3341 unsafe {
3342 let min: u64x4 = _mm256_min_epu64(a, b).as_u64x4();
3343 transmute(src:simd_select_bitmask(m:k, yes:min, no:u64x4::ZERO))
3344 }
3345}
3346
3347/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3348///
3349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735)
3350#[inline]
3351#[target_feature(enable = "avx512f,avx512vl")]
3352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3353#[cfg_attr(test, assert_instr(vpminuq))]
3354pub fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
3355 unsafe {
3356 let a: u64x2 = a.as_u64x2();
3357 let b: u64x2 = b.as_u64x2();
3358 transmute(src:simd_select::<i64x2, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
3359 }
3360}
3361
3362/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3363///
3364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733)
3365#[inline]
3366#[target_feature(enable = "avx512f,avx512vl")]
3367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3368#[cfg_attr(test, assert_instr(vpminuq))]
3369pub fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3370 unsafe {
3371 let min: u64x2 = _mm_min_epu64(a, b).as_u64x2();
3372 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x2()))
3373 }
3374}
3375
3376/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3377///
3378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734)
3379#[inline]
3380#[target_feature(enable = "avx512f,avx512vl")]
3381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3382#[cfg_attr(test, assert_instr(vpminuq))]
3383pub fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3384 unsafe {
3385 let min: u64x2 = _mm_min_epu64(a, b).as_u64x2();
3386 transmute(src:simd_select_bitmask(m:k, yes:min, no:u64x2::ZERO))
3387 }
3388}
3389
3390/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
3391///
3392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371)
3393#[inline]
3394#[target_feature(enable = "avx512f")]
3395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3396#[cfg_attr(test, assert_instr(vsqrtps))]
3397pub fn _mm512_sqrt_ps(a: __m512) -> __m512 {
3398 unsafe { simd_fsqrt(a) }
3399}
3400
3401/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3402///
3403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369)
3404#[inline]
3405#[target_feature(enable = "avx512f")]
3406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3407#[cfg_attr(test, assert_instr(vsqrtps))]
3408pub fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
3409 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3410}
3411
3412/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3413///
3414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370)
3415#[inline]
3416#[target_feature(enable = "avx512f")]
3417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3418#[cfg_attr(test, assert_instr(vsqrtps))]
3419pub fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
3420 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm512_setzero_ps()) }
3421}
3422
3423/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3424///
3425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366)
3426#[inline]
3427#[target_feature(enable = "avx512f,avx512vl")]
3428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3429#[cfg_attr(test, assert_instr(vsqrtps))]
3430pub fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3431 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3432}
3433
3434/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3435///
3436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367)
3437#[inline]
3438#[target_feature(enable = "avx512f,avx512vl")]
3439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3440#[cfg_attr(test, assert_instr(vsqrtps))]
3441pub fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3442 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm256_setzero_ps()) }
3443}
3444
3445/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3446///
3447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363)
3448#[inline]
3449#[target_feature(enable = "avx512f,avx512vl")]
3450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3451#[cfg_attr(test, assert_instr(vsqrtps))]
3452pub fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3453 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3454}
3455
3456/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3457///
3458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364)
3459#[inline]
3460#[target_feature(enable = "avx512f,avx512vl")]
3461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3462#[cfg_attr(test, assert_instr(vsqrtps))]
3463pub fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3464 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm_setzero_ps()) }
3465}
3466
3467/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3468///
3469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362)
3470#[inline]
3471#[target_feature(enable = "avx512f")]
3472#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3473#[cfg_attr(test, assert_instr(vsqrtpd))]
3474pub fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3475 unsafe { simd_fsqrt(a) }
3476}
3477
3478/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3479///
3480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360)
3481#[inline]
3482#[target_feature(enable = "avx512f")]
3483#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3484#[cfg_attr(test, assert_instr(vsqrtpd))]
3485pub fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3486 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3487}
3488
3489/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3490///
3491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361)
3492#[inline]
3493#[target_feature(enable = "avx512f")]
3494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3495#[cfg_attr(test, assert_instr(vsqrtpd))]
3496pub fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3497 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm512_setzero_pd()) }
3498}
3499
3500/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3501///
3502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357)
3503#[inline]
3504#[target_feature(enable = "avx512f,avx512vl")]
3505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3506#[cfg_attr(test, assert_instr(vsqrtpd))]
3507pub fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3508 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3509}
3510
3511/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3512///
3513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358)
3514#[inline]
3515#[target_feature(enable = "avx512f,avx512vl")]
3516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3517#[cfg_attr(test, assert_instr(vsqrtpd))]
3518pub fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3519 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm256_setzero_pd()) }
3520}
3521
3522/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3523///
3524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354)
3525#[inline]
3526#[target_feature(enable = "avx512f,avx512vl")]
3527#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3528#[cfg_attr(test, assert_instr(vsqrtpd))]
3529pub fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3530 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3531}
3532
3533/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3534///
3535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355)
3536#[inline]
3537#[target_feature(enable = "avx512f,avx512vl")]
3538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3539#[cfg_attr(test, assert_instr(vsqrtpd))]
3540pub fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3541 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm_setzero_pd()) }
3542}
3543
3544/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3545///
3546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557)
3547#[inline]
3548#[target_feature(enable = "avx512f")]
3549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3550#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3551pub fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3552 unsafe { simd_fma(x:a, y:b, z:c) }
3553}
3554
3555/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3556///
3557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558)
3558#[inline]
3559#[target_feature(enable = "avx512f")]
3560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3561#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3562pub fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3563 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ps(a, b, c), no:a) }
3564}
3565
3566/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3567///
3568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560)
3569#[inline]
3570#[target_feature(enable = "avx512f")]
3571#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3572#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3573pub fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3574 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ps(a, b, c), no:_mm512_setzero_ps()) }
3575}
3576
3577/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3578///
3579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559)
3580#[inline]
3581#[target_feature(enable = "avx512f")]
3582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3583#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3584pub fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3585 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ps(a, b, c), no:c) }
3586}
3587
3588/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3589///
3590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554)
3591#[inline]
3592#[target_feature(enable = "avx512f,avx512vl")]
3593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3594#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3595pub fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3596 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ps(a, b, c), no:a) }
3597}
3598
3599/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3600///
3601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556)
3602#[inline]
3603#[target_feature(enable = "avx512f,avx512vl")]
3604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3605#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3606pub fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3607 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ps(a, b, c), no:_mm256_setzero_ps()) }
3608}
3609
3610/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3611///
3612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555)
3613#[inline]
3614#[target_feature(enable = "avx512f,avx512vl")]
3615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3616#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3617pub fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3618 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ps(a, b, c), no:c) }
3619}
3620
3621/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3622///
3623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550)
3624#[inline]
3625#[target_feature(enable = "avx512f,avx512vl")]
3626#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3627#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3628pub fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3629 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ps(a, b, c), no:a) }
3630}
3631
3632/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3633///
3634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552)
3635#[inline]
3636#[target_feature(enable = "avx512f,avx512vl")]
3637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3638#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3639pub fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3640 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ps(a, b, c), no:_mm_setzero_ps()) }
3641}
3642
3643/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3644///
3645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551)
3646#[inline]
3647#[target_feature(enable = "avx512f,avx512vl")]
3648#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3649#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3650pub fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3651 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ps(a, b, c), no:c) }
3652}
3653
3654/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3655///
3656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545)
3657#[inline]
3658#[target_feature(enable = "avx512f")]
3659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3660#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3661pub fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3662 unsafe { simd_fma(x:a, y:b, z:c) }
3663}
3664
3665/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3666///
3667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546)
3668#[inline]
3669#[target_feature(enable = "avx512f")]
3670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3671#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3672pub fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3673 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_pd(a, b, c), no:a) }
3674}
3675
3676/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3677///
3678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548)
3679#[inline]
3680#[target_feature(enable = "avx512f")]
3681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3682#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3683pub fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3684 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_pd(a, b, c), no:_mm512_setzero_pd()) }
3685}
3686
3687/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3688///
3689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547)
3690#[inline]
3691#[target_feature(enable = "avx512f")]
3692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3693#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3694pub fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3695 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_pd(a, b, c), no:c) }
3696}
3697
3698/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3699///
3700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542)
3701#[inline]
3702#[target_feature(enable = "avx512f,avx512vl")]
3703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3704#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3705pub fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3706 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_pd(a, b, c), no:a) }
3707}
3708
3709/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3710///
3711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544)
3712#[inline]
3713#[target_feature(enable = "avx512f,avx512vl")]
3714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3715#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3716pub fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3717 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_pd(a, b, c), no:_mm256_setzero_pd()) }
3718}
3719
3720/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3721///
3722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543)
3723#[inline]
3724#[target_feature(enable = "avx512f,avx512vl")]
3725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3726#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3727pub fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3728 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_pd(a, b, c), no:c) }
3729}
3730
3731/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3732///
3733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538)
3734#[inline]
3735#[target_feature(enable = "avx512f,avx512vl")]
3736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3737#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3738pub fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3739 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_pd(a, b, c), no:a) }
3740}
3741
3742/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3743///
3744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540)
3745#[inline]
3746#[target_feature(enable = "avx512f,avx512vl")]
3747#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3748#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3749pub fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3750 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_pd(a, b, c), no:_mm_setzero_pd()) }
3751}
3752
3753/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3754///
3755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539)
3756#[inline]
3757#[target_feature(enable = "avx512f,avx512vl")]
3758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3759#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3760pub fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3761 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_pd(a, b, c), no:c) }
3762}
3763
3764/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3765///
3766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643)
3767#[inline]
3768#[target_feature(enable = "avx512f")]
3769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3770#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3771pub fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3772 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
3773}
3774
3775/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3776///
3777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644)
3778#[inline]
3779#[target_feature(enable = "avx512f")]
3780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3781#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3782pub fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3783 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ps(a, b, c), no:a) }
3784}
3785
3786/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3787///
3788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646)
3789#[inline]
3790#[target_feature(enable = "avx512f")]
3791#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3792#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3793pub fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3794 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ps(a, b, c), no:_mm512_setzero_ps()) }
3795}
3796
3797/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3798///
3799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645)
3800#[inline]
3801#[target_feature(enable = "avx512f")]
3802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3803#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3804pub fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3805 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ps(a, b, c), no:c) }
3806}
3807
3808/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3809///
3810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640)
3811#[inline]
3812#[target_feature(enable = "avx512f,avx512vl")]
3813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3814#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3815pub fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3816 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ps(a, b, c), no:a) }
3817}
3818
3819/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3820///
3821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642)
3822#[inline]
3823#[target_feature(enable = "avx512f,avx512vl")]
3824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3825#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3826pub fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3827 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ps(a, b, c), no:_mm256_setzero_ps()) }
3828}
3829
3830/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3831///
3832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641)
3833#[inline]
3834#[target_feature(enable = "avx512f,avx512vl")]
3835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3836#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3837pub fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3838 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ps(a, b, c), no:c) }
3839}
3840
3841/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3842///
3843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636)
3844#[inline]
3845#[target_feature(enable = "avx512f,avx512vl")]
3846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3847#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3848pub fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3849 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ps(a, b, c), no:a) }
3850}
3851
3852/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3853///
3854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638)
3855#[inline]
3856#[target_feature(enable = "avx512f,avx512vl")]
3857#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3858#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3859pub fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3860 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ps(a, b, c), no:_mm_setzero_ps()) }
3861}
3862
3863/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3864///
3865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637)
3866#[inline]
3867#[target_feature(enable = "avx512f,avx512vl")]
3868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3869#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3870pub fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3871 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ps(a, b, c), no:c) }
3872}
3873
3874/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3875///
3876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631)
3877#[inline]
3878#[target_feature(enable = "avx512f")]
3879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3880#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3881pub fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3882 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
3883}
3884
3885/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3886///
3887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632)
3888#[inline]
3889#[target_feature(enable = "avx512f")]
3890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3891#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3892pub fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3893 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_pd(a, b, c), no:a) }
3894}
3895
3896/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3897///
3898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634)
3899#[inline]
3900#[target_feature(enable = "avx512f")]
3901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3902#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3903pub fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3904 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_pd(a, b, c), no:_mm512_setzero_pd()) }
3905}
3906
3907/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3908///
3909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633)
3910#[inline]
3911#[target_feature(enable = "avx512f")]
3912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3913#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3914pub fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3915 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_pd(a, b, c), no:c) }
3916}
3917
3918/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3919///
3920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628)
3921#[inline]
3922#[target_feature(enable = "avx512f,avx512vl")]
3923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3924#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3925pub fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3926 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_pd(a, b, c), no:a) }
3927}
3928
3929/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3930///
3931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630)
3932#[inline]
3933#[target_feature(enable = "avx512f,avx512vl")]
3934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3935#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3936pub fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3937 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_pd(a, b, c), no:_mm256_setzero_pd()) }
3938}
3939
3940/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3941///
3942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629)
3943#[inline]
3944#[target_feature(enable = "avx512f,avx512vl")]
3945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3946#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3947pub fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3948 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_pd(a, b, c), no:c) }
3949}
3950
3951/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3952///
3953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624)
3954#[inline]
3955#[target_feature(enable = "avx512f,avx512vl")]
3956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3957#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3958pub fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3959 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_pd(a, b, c), no:a) }
3960}
3961
3962/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3963///
3964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626)
3965#[inline]
3966#[target_feature(enable = "avx512f,avx512vl")]
3967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3968#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3969pub fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3970 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_pd(a, b, c), no:_mm_setzero_pd()) }
3971}
3972
3973/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3974///
3975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625)
3976#[inline]
3977#[target_feature(enable = "avx512f,avx512vl")]
3978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3979#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3980pub fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3981 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_pd(a, b, c), no:c) }
3982}
3983
3984/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3985///
3986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611)
3987#[inline]
3988#[target_feature(enable = "avx512f")]
3989#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
3990#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3991pub fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3992 unsafe {
3993 let add: __m512 = simd_fma(x:a, y:b, z:c);
3994 let sub: __m512 = simd_fma(x:a, y:b, z:simd_neg(c));
3995 simd_shuffle!(
3996 add,
3997 sub,
3998 [16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11, 28, 13, 30, 15]
3999 )
4000 }
4001}
4002
4003/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4004///
4005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612)
4006#[inline]
4007#[target_feature(enable = "avx512f")]
4008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4009#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4010pub fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4011 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ps(a, b, c), no:a) }
4012}
4013
4014/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4015///
4016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614)
4017#[inline]
4018#[target_feature(enable = "avx512f")]
4019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4020#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4021pub fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4022 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ps(a, b, c), no:_mm512_setzero_ps()) }
4023}
4024
4025/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4026///
4027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
4028#[inline]
4029#[target_feature(enable = "avx512f")]
4030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4031#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4032pub fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4033 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ps(a, b, c), no:c) }
4034}
4035
4036/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4037///
4038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608)
4039#[inline]
4040#[target_feature(enable = "avx512f,avx512vl")]
4041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4042#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4043pub fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4044 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ps(a, b, c), no:a) }
4045}
4046
4047/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4048///
4049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610)
4050#[inline]
4051#[target_feature(enable = "avx512f,avx512vl")]
4052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4053#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4054pub fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4055 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ps(a, b, c), no:_mm256_setzero_ps()) }
4056}
4057
4058/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4059///
4060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609)
4061#[inline]
4062#[target_feature(enable = "avx512f,avx512vl")]
4063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4064#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4065pub fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4066 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ps(a, b, c), no:c) }
4067}
4068
4069/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4070///
4071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604)
4072#[inline]
4073#[target_feature(enable = "avx512f,avx512vl")]
4074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4075#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4076pub fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4077 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ps(a, b, c), no:a) }
4078}
4079
4080/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4081///
4082/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606)
4083#[inline]
4084#[target_feature(enable = "avx512f,avx512vl")]
4085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4086#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4087pub fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4088 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ps(a, b, c), no:_mm_setzero_ps()) }
4089}
4090
4091/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4092///
4093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605)
4094#[inline]
4095#[target_feature(enable = "avx512f,avx512vl")]
4096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4097#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4098pub fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4099 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ps(a, b, c), no:c) }
4100}
4101
4102/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4103///
4104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599)
4105#[inline]
4106#[target_feature(enable = "avx512f")]
4107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4108#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4109pub fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4110 unsafe {
4111 let add: __m512d = simd_fma(x:a, y:b, z:c);
4112 let sub: __m512d = simd_fma(x:a, y:b, z:simd_neg(c));
4113 simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
4114 }
4115}
4116
4117/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4118///
4119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600)
4120#[inline]
4121#[target_feature(enable = "avx512f")]
4122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4123#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4124pub fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4125 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_pd(a, b, c), no:a) }
4126}
4127
4128/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4129///
4130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602)
4131#[inline]
4132#[target_feature(enable = "avx512f")]
4133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4134#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4135pub fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4136 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_pd(a, b, c), no:_mm512_setzero_pd()) }
4137}
4138
4139/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4140///
4141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_pd&expand=2613)
4142#[inline]
4143#[target_feature(enable = "avx512f")]
4144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4145#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4146pub fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4147 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_pd(a, b, c), no:c) }
4148}
4149
4150/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4151///
4152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596)
4153#[inline]
4154#[target_feature(enable = "avx512f,avx512vl")]
4155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4156#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4157pub fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4158 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_pd(a, b, c), no:a) }
4159}
4160
4161/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4162///
4163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598)
4164#[inline]
4165#[target_feature(enable = "avx512f,avx512vl")]
4166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4167#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4168pub fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4169 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_pd(a, b, c), no:_mm256_setzero_pd()) }
4170}
4171
4172/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4173///
4174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597)
4175#[inline]
4176#[target_feature(enable = "avx512f,avx512vl")]
4177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4178#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4179pub fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4180 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_pd(a, b, c), no:c) }
4181}
4182
4183/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4184///
4185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592)
4186#[inline]
4187#[target_feature(enable = "avx512f,avx512vl")]
4188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4189#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4190pub fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4191 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_pd(a, b, c), no:a) }
4192}
4193
4194/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4195///
4196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594)
4197#[inline]
4198#[target_feature(enable = "avx512f,avx512vl")]
4199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4200#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4201pub fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4202 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_pd(a, b, c), no:_mm_setzero_pd()) }
4203}
4204
4205/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4206///
4207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593)
4208#[inline]
4209#[target_feature(enable = "avx512f,avx512vl")]
4210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4211#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4212pub fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4213 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_pd(a, b, c), no:c) }
4214}
4215
4216/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4217///
4218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691)
4219#[inline]
4220#[target_feature(enable = "avx512f")]
4221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4222#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4223pub fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4224 unsafe {
4225 let add: __m512 = simd_fma(x:a, y:b, z:c);
4226 let sub: __m512 = simd_fma(x:a, y:b, z:simd_neg(c));
4227 simd_shuffle!(
4228 add,
4229 sub,
4230 [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31]
4231 )
4232 }
4233}
4234
4235/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4236///
4237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692)
4238#[inline]
4239#[target_feature(enable = "avx512f")]
4240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4241#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4242pub fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4243 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ps(a, b, c), no:a) }
4244}
4245
4246/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4247///
4248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694)
4249#[inline]
4250#[target_feature(enable = "avx512f")]
4251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4252#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4253pub fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4254 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ps(a, b, c), no:_mm512_setzero_ps()) }
4255}
4256
4257/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4258///
4259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693)
4260#[inline]
4261#[target_feature(enable = "avx512f")]
4262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4263#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4264pub fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4265 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ps(a, b, c), no:c) }
4266}
4267
4268/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4269///
4270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688)
4271#[inline]
4272#[target_feature(enable = "avx512f,avx512vl")]
4273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4274#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4275pub fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4276 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ps(a, b, c), no:a) }
4277}
4278
4279/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4280///
4281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690)
4282#[inline]
4283#[target_feature(enable = "avx512f,avx512vl")]
4284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4285#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4286pub fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4287 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ps(a, b, c), no:_mm256_setzero_ps()) }
4288}
4289
4290/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4291///
4292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689)
4293#[inline]
4294#[target_feature(enable = "avx512f,avx512vl")]
4295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4296#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4297pub fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4298 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ps(a, b, c), no:c) }
4299}
4300
4301/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4302///
4303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684)
4304#[inline]
4305#[target_feature(enable = "avx512f,avx512vl")]
4306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4307#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4308pub fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4309 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ps(a, b, c), no:a) }
4310}
4311
4312/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4313///
4314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686)
4315#[inline]
4316#[target_feature(enable = "avx512f,avx512vl")]
4317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4318#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4319pub fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4320 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ps(a, b, c), no:_mm_setzero_ps()) }
4321}
4322
4323/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4324///
4325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685)
4326#[inline]
4327#[target_feature(enable = "avx512f,avx512vl")]
4328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4329#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4330pub fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4331 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ps(a, b, c), no:c) }
4332}
4333
4334/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4335///
4336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679)
4337#[inline]
4338#[target_feature(enable = "avx512f")]
4339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4340#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4341pub fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4342 unsafe {
4343 let add: __m512d = simd_fma(x:a, y:b, z:c);
4344 let sub: __m512d = simd_fma(x:a, y:b, z:simd_neg(c));
4345 simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
4346 }
4347}
4348
4349/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4350///
4351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680)
4352#[inline]
4353#[target_feature(enable = "avx512f")]
4354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4355#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4356pub fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4357 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_pd(a, b, c), no:a) }
4358}
4359
4360/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4361///
4362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682)
4363#[inline]
4364#[target_feature(enable = "avx512f")]
4365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4366#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4367pub fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4368 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_pd(a, b, c), no:_mm512_setzero_pd()) }
4369}
4370
4371/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4372///
4373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681)
4374#[inline]
4375#[target_feature(enable = "avx512f")]
4376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4377#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4378pub fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4379 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_pd(a, b, c), no:c) }
4380}
4381
4382/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4383///
4384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676)
4385#[inline]
4386#[target_feature(enable = "avx512f,avx512vl")]
4387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4388#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4389pub fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4390 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_pd(a, b, c), no:a) }
4391}
4392
4393/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4394///
4395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678)
4396#[inline]
4397#[target_feature(enable = "avx512f,avx512vl")]
4398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4399#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4400pub fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4401 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_pd(a, b, c), no:_mm256_setzero_pd()) }
4402}
4403
4404/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4405///
4406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677)
4407#[inline]
4408#[target_feature(enable = "avx512f,avx512vl")]
4409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4410#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4411pub fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4412 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_pd(a, b, c), no:c) }
4413}
4414
4415/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4416///
4417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672)
4418#[inline]
4419#[target_feature(enable = "avx512f,avx512vl")]
4420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4421#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4422pub fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4423 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_pd(a, b, c), no:a) }
4424}
4425
4426/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4427///
4428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674)
4429#[inline]
4430#[target_feature(enable = "avx512f,avx512vl")]
4431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4432#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4433pub fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4434 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_pd(a, b, c), no:_mm_setzero_pd()) }
4435}
4436
4437/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4438///
4439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673)
4440#[inline]
4441#[target_feature(enable = "avx512f,avx512vl")]
4442#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4443#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4444pub fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4445 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_pd(a, b, c), no:c) }
4446}
4447
4448/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4449///
4450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723)
4451#[inline]
4452#[target_feature(enable = "avx512f")]
4453#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4454#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4455pub fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4456 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
4457}
4458
4459/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4460///
4461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724)
4462#[inline]
4463#[target_feature(enable = "avx512f")]
4464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4465#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4466pub fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4467 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ps(a, b, c), no:a) }
4468}
4469
4470/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4471///
4472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726)
4473#[inline]
4474#[target_feature(enable = "avx512f")]
4475#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4476#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4477pub fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4478 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ps(a, b, c), no:_mm512_setzero_ps()) }
4479}
4480
4481/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4482///
4483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725)
4484#[inline]
4485#[target_feature(enable = "avx512f")]
4486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4487#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4488pub fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4489 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ps(a, b, c), no:c) }
4490}
4491
4492/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4493///
4494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720)
4495#[inline]
4496#[target_feature(enable = "avx512f,avx512vl")]
4497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4498#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4499pub fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4500 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ps(a, b, c), no:a) }
4501}
4502
4503/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4504///
4505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722)
4506#[inline]
4507#[target_feature(enable = "avx512f,avx512vl")]
4508#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4509#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4510pub fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4511 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ps(a, b, c), no:_mm256_setzero_ps()) }
4512}
4513
4514/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4515///
4516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721)
4517#[inline]
4518#[target_feature(enable = "avx512f,avx512vl")]
4519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4520#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4521pub fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4522 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ps(a, b, c), no:c) }
4523}
4524
4525/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4526///
4527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716)
4528#[inline]
4529#[target_feature(enable = "avx512f,avx512vl")]
4530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4531#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4532pub fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4533 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ps(a, b, c), no:a) }
4534}
4535
4536/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4537///
4538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718)
4539#[inline]
4540#[target_feature(enable = "avx512f,avx512vl")]
4541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4542#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4543pub fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4544 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ps(a, b, c), no:_mm_setzero_ps()) }
4545}
4546
4547/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4548///
4549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717)
4550#[inline]
4551#[target_feature(enable = "avx512f,avx512vl")]
4552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4553#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4554pub fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4555 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ps(a, b, c), no:c) }
4556}
4557
4558/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4559///
4560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
4561#[inline]
4562#[target_feature(enable = "avx512f")]
4563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4564#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4565pub fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4566 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
4567}
4568
4569/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4570///
4571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712)
4572#[inline]
4573#[target_feature(enable = "avx512f")]
4574#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4575#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4576pub fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4577 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_pd(a, b, c), no:a) }
4578}
4579
4580/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4581///
4582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714)
4583#[inline]
4584#[target_feature(enable = "avx512f")]
4585#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4586#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4587pub fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4588 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_pd(a, b, c), no:_mm512_setzero_pd()) }
4589}
4590
4591/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4592///
4593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713)
4594#[inline]
4595#[target_feature(enable = "avx512f")]
4596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4597#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4598pub fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4599 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_pd(a, b, c), no:c) }
4600}
4601
4602/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4603///
4604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708)
4605#[inline]
4606#[target_feature(enable = "avx512f,avx512vl")]
4607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4608#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4609pub fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4610 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_pd(a, b, c), no:a) }
4611}
4612
4613/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4614///
4615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710)
4616#[inline]
4617#[target_feature(enable = "avx512f,avx512vl")]
4618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4619#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4620pub fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4621 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_pd(a, b, c), no:_mm256_setzero_pd()) }
4622}
4623
4624/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4625///
4626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709)
4627#[inline]
4628#[target_feature(enable = "avx512f,avx512vl")]
4629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4630#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4631pub fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4632 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_pd(a, b, c), no:c) }
4633}
4634
4635/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4636///
4637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704)
4638#[inline]
4639#[target_feature(enable = "avx512f,avx512vl")]
4640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4641#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4642pub fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4643 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_pd(a, b, c), no:a) }
4644}
4645
4646/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4647///
4648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706)
4649#[inline]
4650#[target_feature(enable = "avx512f,avx512vl")]
4651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4652#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4653pub fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4654 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_pd(a, b, c), no:_mm_setzero_pd()) }
4655}
4656
4657/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4658///
4659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705)
4660#[inline]
4661#[target_feature(enable = "avx512f,avx512vl")]
4662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4663#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4664pub fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4665 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_pd(a, b, c), no:c) }
4666}
4667
4668/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4669///
4670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771)
4671#[inline]
4672#[target_feature(enable = "avx512f")]
4673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4674#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4675pub fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4676 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
4677}
4678
4679/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4680///
4681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772)
4682#[inline]
4683#[target_feature(enable = "avx512f")]
4684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4685#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4686pub fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4687 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ps(a, b, c), no:a) }
4688}
4689
4690/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4691///
4692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774)
4693#[inline]
4694#[target_feature(enable = "avx512f")]
4695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4696#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4697pub fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4698 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ps(a, b, c), no:_mm512_setzero_ps()) }
4699}
4700
4701/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4702///
4703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773)
4704#[inline]
4705#[target_feature(enable = "avx512f")]
4706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4707#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4708pub fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4709 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ps(a, b, c), no:c) }
4710}
4711
4712/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4713///
4714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768)
4715#[inline]
4716#[target_feature(enable = "avx512f,avx512vl")]
4717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4718#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4719pub fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4720 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ps(a, b, c), no:a) }
4721}
4722
4723/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4724///
4725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770)
4726#[inline]
4727#[target_feature(enable = "avx512f,avx512vl")]
4728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4729#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4730pub fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4731 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ps(a, b, c), no:_mm256_setzero_ps()) }
4732}
4733
4734/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4735///
4736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769)
4737#[inline]
4738#[target_feature(enable = "avx512f,avx512vl")]
4739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4740#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4741pub fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4742 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ps(a, b, c), no:c) }
4743}
4744
4745/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4746///
4747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764)
4748#[inline]
4749#[target_feature(enable = "avx512f,avx512vl")]
4750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4751#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4752pub fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4753 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ps(a, b, c), no:a) }
4754}
4755
4756/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4757///
4758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766)
4759#[inline]
4760#[target_feature(enable = "avx512f,avx512vl")]
4761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4762#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4763pub fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4764 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ps(a, b, c), no:_mm_setzero_ps()) }
4765}
4766
4767/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4768///
4769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765)
4770#[inline]
4771#[target_feature(enable = "avx512f,avx512vl")]
4772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4773#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4774pub fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4775 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ps(a, b, c), no:c) }
4776}
4777
4778/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4779///
4780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759)
4781#[inline]
4782#[target_feature(enable = "avx512f")]
4783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4784#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4785pub fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4786 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
4787}
4788
4789/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4790///
4791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760)
4792#[inline]
4793#[target_feature(enable = "avx512f")]
4794#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4795#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4796pub fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4797 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_pd(a, b, c), no:a) }
4798}
4799
4800/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4801///
4802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762)
4803#[inline]
4804#[target_feature(enable = "avx512f")]
4805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4806#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4807pub fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4808 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_pd(a, b, c), no:_mm512_setzero_pd()) }
4809}
4810
4811/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4812///
4813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761)
4814#[inline]
4815#[target_feature(enable = "avx512f")]
4816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4817#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4818pub fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4819 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_pd(a, b, c), no:c) }
4820}
4821
4822/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4823///
4824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756)
4825#[inline]
4826#[target_feature(enable = "avx512f,avx512vl")]
4827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4828#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4829pub fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4830 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_pd(a, b, c), no:a) }
4831}
4832
4833/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4834///
4835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758)
4836#[inline]
4837#[target_feature(enable = "avx512f,avx512vl")]
4838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4839#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4840pub fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4841 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_pd(a, b, c), no:_mm256_setzero_pd()) }
4842}
4843
4844/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4845///
4846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757)
4847#[inline]
4848#[target_feature(enable = "avx512f,avx512vl")]
4849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4850#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4851pub fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4852 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_pd(a, b, c), no:c) }
4853}
4854
4855/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4856///
4857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752)
4858#[inline]
4859#[target_feature(enable = "avx512f,avx512vl")]
4860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4861#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4862pub fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4863 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_pd(a, b, c), no:a) }
4864}
4865
4866/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4867///
4868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754)
4869#[inline]
4870#[target_feature(enable = "avx512f,avx512vl")]
4871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4872#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4873pub fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4874 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_pd(a, b, c), no:_mm_setzero_pd()) }
4875}
4876
4877/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4878///
4879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753)
4880#[inline]
4881#[target_feature(enable = "avx512f,avx512vl")]
4882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4883#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4884pub fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4885 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_pd(a, b, c), no:c) }
4886}
4887
4888/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4889///
4890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502)
4891#[inline]
4892#[target_feature(enable = "avx512f")]
4893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4894#[cfg_attr(test, assert_instr(vrcp14ps))]
4895pub fn _mm512_rcp14_ps(a: __m512) -> __m512 {
4896 unsafe { transmute(src:vrcp14ps(a.as_f32x16(), src:f32x16::ZERO, m:0b11111111_11111111)) }
4897}
4898
4899/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4900///
4901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500)
4902#[inline]
4903#[target_feature(enable = "avx512f")]
4904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4905#[cfg_attr(test, assert_instr(vrcp14ps))]
4906pub fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
4907 unsafe { transmute(src:vrcp14ps(a.as_f32x16(), src.as_f32x16(), m:k)) }
4908}
4909
4910/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4911///
4912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501)
4913#[inline]
4914#[target_feature(enable = "avx512f")]
4915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4916#[cfg_attr(test, assert_instr(vrcp14ps))]
4917pub fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
4918 unsafe { transmute(src:vrcp14ps(a.as_f32x16(), src:f32x16::ZERO, m:k)) }
4919}
4920
4921/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4922///
4923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499)
4924#[inline]
4925#[target_feature(enable = "avx512f,avx512vl")]
4926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4927#[cfg_attr(test, assert_instr(vrcp14ps))]
4928pub fn _mm256_rcp14_ps(a: __m256) -> __m256 {
4929 unsafe { transmute(src:vrcp14ps256(a.as_f32x8(), src:f32x8::ZERO, m:0b11111111)) }
4930}
4931
4932/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4933///
4934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497)
4935#[inline]
4936#[target_feature(enable = "avx512f,avx512vl")]
4937#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4938#[cfg_attr(test, assert_instr(vrcp14ps))]
4939pub fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
4940 unsafe { transmute(src:vrcp14ps256(a.as_f32x8(), src.as_f32x8(), m:k)) }
4941}
4942
4943/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4944///
4945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498)
4946#[inline]
4947#[target_feature(enable = "avx512f,avx512vl")]
4948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4949#[cfg_attr(test, assert_instr(vrcp14ps))]
4950pub fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
4951 unsafe { transmute(src:vrcp14ps256(a.as_f32x8(), src:f32x8::ZERO, m:k)) }
4952}
4953
4954/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4955///
4956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496)
4957#[inline]
4958#[target_feature(enable = "avx512f,avx512vl")]
4959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4960#[cfg_attr(test, assert_instr(vrcp14ps))]
4961pub fn _mm_rcp14_ps(a: __m128) -> __m128 {
4962 unsafe { transmute(src:vrcp14ps128(a.as_f32x4(), src:f32x4::ZERO, m:0b00001111)) }
4963}
4964
4965/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4966///
4967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494)
4968#[inline]
4969#[target_feature(enable = "avx512f,avx512vl")]
4970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4971#[cfg_attr(test, assert_instr(vrcp14ps))]
4972pub fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
4973 unsafe { transmute(src:vrcp14ps128(a.as_f32x4(), src.as_f32x4(), m:k)) }
4974}
4975
4976/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4977///
4978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495)
4979#[inline]
4980#[target_feature(enable = "avx512f,avx512vl")]
4981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4982#[cfg_attr(test, assert_instr(vrcp14ps))]
4983pub fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
4984 unsafe { transmute(src:vrcp14ps128(a.as_f32x4(), src:f32x4::ZERO, m:k)) }
4985}
4986
4987/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4988///
4989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493)
4990#[inline]
4991#[target_feature(enable = "avx512f")]
4992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
4993#[cfg_attr(test, assert_instr(vrcp14pd))]
4994pub fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
4995 unsafe { transmute(src:vrcp14pd(a.as_f64x8(), src:f64x8::ZERO, m:0b11111111)) }
4996}
4997
4998/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4999///
5000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491)
5001#[inline]
5002#[target_feature(enable = "avx512f")]
5003#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5004#[cfg_attr(test, assert_instr(vrcp14pd))]
5005pub fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5006 unsafe { transmute(src:vrcp14pd(a.as_f64x8(), src.as_f64x8(), m:k)) }
5007}
5008
5009/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5010///
5011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492)
5012#[inline]
5013#[target_feature(enable = "avx512f")]
5014#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5015#[cfg_attr(test, assert_instr(vrcp14pd))]
5016pub fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
5017 unsafe { transmute(src:vrcp14pd(a.as_f64x8(), src:f64x8::ZERO, m:k)) }
5018}
5019
5020/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5021///
5022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490)
5023#[inline]
5024#[target_feature(enable = "avx512f,avx512vl")]
5025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5026#[cfg_attr(test, assert_instr(vrcp14pd))]
5027pub fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
5028 unsafe { transmute(src:vrcp14pd256(a.as_f64x4(), src:f64x4::ZERO, m:0b00001111)) }
5029}
5030
5031/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5032///
5033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488)
5034#[inline]
5035#[target_feature(enable = "avx512f,avx512vl")]
5036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5037#[cfg_attr(test, assert_instr(vrcp14pd))]
5038pub fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5039 unsafe { transmute(src:vrcp14pd256(a.as_f64x4(), src.as_f64x4(), m:k)) }
5040}
5041
5042/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5043///
5044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489)
5045#[inline]
5046#[target_feature(enable = "avx512f,avx512vl")]
5047#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5048#[cfg_attr(test, assert_instr(vrcp14pd))]
5049pub fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
5050 unsafe { transmute(src:vrcp14pd256(a.as_f64x4(), src:f64x4::ZERO, m:k)) }
5051}
5052
5053/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5054///
5055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487)
5056#[inline]
5057#[target_feature(enable = "avx512f,avx512vl")]
5058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5059#[cfg_attr(test, assert_instr(vrcp14pd))]
5060pub fn _mm_rcp14_pd(a: __m128d) -> __m128d {
5061 unsafe { transmute(src:vrcp14pd128(a.as_f64x2(), src:f64x2::ZERO, m:0b00000011)) }
5062}
5063
5064/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5065///
5066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485)
5067#[inline]
5068#[target_feature(enable = "avx512f,avx512vl")]
5069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5070#[cfg_attr(test, assert_instr(vrcp14pd))]
5071pub fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5072 unsafe { transmute(src:vrcp14pd128(a.as_f64x2(), src.as_f64x2(), m:k)) }
5073}
5074
5075/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5076///
5077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486)
5078#[inline]
5079#[target_feature(enable = "avx512f,avx512vl")]
5080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5081#[cfg_attr(test, assert_instr(vrcp14pd))]
5082pub fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
5083 unsafe { transmute(src:vrcp14pd128(a.as_f64x2(), src:f64x2::ZERO, m:k)) }
5084}
5085
5086/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5087///
5088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819)
5089#[inline]
5090#[target_feature(enable = "avx512f")]
5091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5092#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5093pub fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
5094 unsafe { transmute(src:vrsqrt14ps(a.as_f32x16(), src:f32x16::ZERO, m:0b11111111_11111111)) }
5095}
5096
5097/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5098///
5099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817)
5100#[inline]
5101#[target_feature(enable = "avx512f")]
5102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5103#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5104pub fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5105 unsafe { transmute(src:vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), m:k)) }
5106}
5107
5108/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5109///
5110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818)
5111#[inline]
5112#[target_feature(enable = "avx512f")]
5113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5114#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5115pub fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
5116 unsafe { transmute(src:vrsqrt14ps(a.as_f32x16(), src:f32x16::ZERO, m:k)) }
5117}
5118
5119/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5120///
5121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_ps)
5122#[inline]
5123#[target_feature(enable = "avx512f,avx512vl")]
5124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5125#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5126pub fn _mm256_rsqrt14_ps(a: __m256) -> __m256 {
5127 unsafe { transmute(src:vrsqrt14ps256(a.as_f32x8(), src:f32x8::ZERO, m:0b11111111)) }
5128}
5129
5130/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5131///
5132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
5133#[inline]
5134#[target_feature(enable = "avx512f,avx512vl")]
5135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5136#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5137pub fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5138 unsafe { transmute(src:vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), m:k)) }
5139}
5140
5141/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5142///
5143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816)
5144#[inline]
5145#[target_feature(enable = "avx512f,avx512vl")]
5146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5147#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5148pub fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
5149 unsafe { transmute(src:vrsqrt14ps256(a.as_f32x8(), src:f32x8::ZERO, m:k)) }
5150}
5151
5152/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5153///
5154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_ps)
5155#[inline]
5156#[target_feature(enable = "avx512f,avx512vl")]
5157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5158#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5159pub fn _mm_rsqrt14_ps(a: __m128) -> __m128 {
5160 unsafe { transmute(src:vrsqrt14ps128(a.as_f32x4(), src:f32x4::ZERO, m:0b00001111)) }
5161}
5162
5163/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5164///
5165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
5166#[inline]
5167#[target_feature(enable = "avx512f,avx512vl")]
5168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5169#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5170pub fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5171 unsafe { transmute(src:vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), m:k)) }
5172}
5173
5174/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5175///
5176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814)
5177#[inline]
5178#[target_feature(enable = "avx512f,avx512vl")]
5179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5180#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5181pub fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
5182 unsafe { transmute(src:vrsqrt14ps128(a.as_f32x4(), src:f32x4::ZERO, m:k)) }
5183}
5184
5185/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5186///
5187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812)
5188#[inline]
5189#[target_feature(enable = "avx512f")]
5190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5191#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5192pub fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
5193 unsafe { transmute(src:vrsqrt14pd(a.as_f64x8(), src:f64x8::ZERO, m:0b11111111)) }
5194}
5195
5196/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5197///
5198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810)
5199#[inline]
5200#[target_feature(enable = "avx512f")]
5201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5202#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5203pub fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5204 unsafe { transmute(src:vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), m:k)) }
5205}
5206
5207/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5208///
5209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811)
5210#[inline]
5211#[target_feature(enable = "avx512f")]
5212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5213#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5214pub fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
5215 unsafe { transmute(src:vrsqrt14pd(a.as_f64x8(), src:f64x8::ZERO, m:k)) }
5216}
5217
5218/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5219///
5220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_pd)
5221#[inline]
5222#[target_feature(enable = "avx512f,avx512vl")]
5223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5224#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5225pub fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d {
5226 unsafe { transmute(src:vrsqrt14pd256(a.as_f64x4(), src:f64x4::ZERO, m:0b00001111)) }
5227}
5228
5229/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5230///
5231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
5232#[inline]
5233#[target_feature(enable = "avx512f,avx512vl")]
5234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5235#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5236pub fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5237 unsafe { transmute(src:vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), m:k)) }
5238}
5239
5240/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5241///
5242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809)
5243#[inline]
5244#[target_feature(enable = "avx512f,avx512vl")]
5245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5246#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5247pub fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
5248 unsafe { transmute(src:vrsqrt14pd256(a.as_f64x4(), src:f64x4::ZERO, m:k)) }
5249}
5250
5251/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5252///
5253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_pd)
5254#[inline]
5255#[target_feature(enable = "avx512f,avx512vl")]
5256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5257#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5258pub fn _mm_rsqrt14_pd(a: __m128d) -> __m128d {
5259 unsafe { transmute(src:vrsqrt14pd128(a.as_f64x2(), src:f64x2::ZERO, m:0b00000011)) }
5260}
5261
5262/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5263///
5264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
5265#[inline]
5266#[target_feature(enable = "avx512f,avx512vl")]
5267#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5268#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5269pub fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5270 unsafe { transmute(src:vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), m:k)) }
5271}
5272
5273/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5274///
5275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807)
5276#[inline]
5277#[target_feature(enable = "avx512f,avx512vl")]
5278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5279#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5280pub fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
5281 unsafe { transmute(src:vrsqrt14pd128(a.as_f64x2(), src:f64x2::ZERO, m:k)) }
5282}
5283
5284/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5285///
5286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844)
5287#[inline]
5288#[target_feature(enable = "avx512f")]
5289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5290#[cfg_attr(test, assert_instr(vgetexpps))]
5291pub fn _mm512_getexp_ps(a: __m512) -> __m512 {
5292 unsafe {
5293 transmute(src:vgetexpps(
5294 a.as_f32x16(),
5295 src:f32x16::ZERO,
5296 m:0b11111111_11111111,
5297 _MM_FROUND_CUR_DIRECTION,
5298 ))
5299 }
5300}
5301
5302/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5303///
5304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845)
5305#[inline]
5306#[target_feature(enable = "avx512f")]
5307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5308#[cfg_attr(test, assert_instr(vgetexpps))]
5309pub fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5310 unsafe {
5311 transmute(src:vgetexpps(
5312 a.as_f32x16(),
5313 src.as_f32x16(),
5314 m:k,
5315 _MM_FROUND_CUR_DIRECTION,
5316 ))
5317 }
5318}
5319
5320/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5321///
5322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846)
5323#[inline]
5324#[target_feature(enable = "avx512f")]
5325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5326#[cfg_attr(test, assert_instr(vgetexpps))]
5327pub fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
5328 unsafe {
5329 transmute(src:vgetexpps(
5330 a.as_f32x16(),
5331 src:f32x16::ZERO,
5332 m:k,
5333 _MM_FROUND_CUR_DIRECTION,
5334 ))
5335 }
5336}
5337
5338/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5339///
5340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841)
5341#[inline]
5342#[target_feature(enable = "avx512f,avx512vl")]
5343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5344#[cfg_attr(test, assert_instr(vgetexpps))]
5345pub fn _mm256_getexp_ps(a: __m256) -> __m256 {
5346 unsafe { transmute(src:vgetexpps256(a.as_f32x8(), src:f32x8::ZERO, m:0b11111111)) }
5347}
5348
5349/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5350///
5351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842)
5352#[inline]
5353#[target_feature(enable = "avx512f,avx512vl")]
5354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5355#[cfg_attr(test, assert_instr(vgetexpps))]
5356pub fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5357 unsafe { transmute(src:vgetexpps256(a.as_f32x8(), src.as_f32x8(), m:k)) }
5358}
5359
5360/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5361///
5362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843)
5363#[inline]
5364#[target_feature(enable = "avx512f,avx512vl")]
5365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5366#[cfg_attr(test, assert_instr(vgetexpps))]
5367pub fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
5368 unsafe { transmute(src:vgetexpps256(a.as_f32x8(), src:f32x8::ZERO, m:k)) }
5369}
5370
5371/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5372///
5373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838)
5374#[inline]
5375#[target_feature(enable = "avx512f,avx512vl")]
5376#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5377#[cfg_attr(test, assert_instr(vgetexpps))]
5378pub fn _mm_getexp_ps(a: __m128) -> __m128 {
5379 unsafe { transmute(src:vgetexpps128(a.as_f32x4(), src:f32x4::ZERO, m:0b00001111)) }
5380}
5381
5382/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5383///
5384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839)
5385#[inline]
5386#[target_feature(enable = "avx512f,avx512vl")]
5387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5388#[cfg_attr(test, assert_instr(vgetexpps))]
5389pub fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5390 unsafe { transmute(src:vgetexpps128(a.as_f32x4(), src.as_f32x4(), m:k)) }
5391}
5392
5393/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5394///
5395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840)
5396#[inline]
5397#[target_feature(enable = "avx512f,avx512vl")]
5398#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5399#[cfg_attr(test, assert_instr(vgetexpps))]
5400pub fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
5401 unsafe { transmute(src:vgetexpps128(a.as_f32x4(), src:f32x4::ZERO, m:k)) }
5402}
5403
5404/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5405///
5406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835)
5407#[inline]
5408#[target_feature(enable = "avx512f")]
5409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5410#[cfg_attr(test, assert_instr(vgetexppd))]
5411pub fn _mm512_getexp_pd(a: __m512d) -> __m512d {
5412 unsafe {
5413 transmute(src:vgetexppd(
5414 a.as_f64x8(),
5415 src:f64x8::ZERO,
5416 m:0b11111111,
5417 _MM_FROUND_CUR_DIRECTION,
5418 ))
5419 }
5420}
5421
5422/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5423///
5424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836)
5425#[inline]
5426#[target_feature(enable = "avx512f")]
5427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5428#[cfg_attr(test, assert_instr(vgetexppd))]
5429pub fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5430 unsafe {
5431 transmute(src:vgetexppd(
5432 a.as_f64x8(),
5433 src.as_f64x8(),
5434 m:k,
5435 _MM_FROUND_CUR_DIRECTION,
5436 ))
5437 }
5438}
5439
5440/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837)
5443#[inline]
5444#[target_feature(enable = "avx512f")]
5445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5446#[cfg_attr(test, assert_instr(vgetexppd))]
5447pub fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
5448 unsafe {
5449 transmute(src:vgetexppd(
5450 a.as_f64x8(),
5451 src:f64x8::ZERO,
5452 m:k,
5453 _MM_FROUND_CUR_DIRECTION,
5454 ))
5455 }
5456}
5457
5458/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5459///
5460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832)
5461#[inline]
5462#[target_feature(enable = "avx512f,avx512vl")]
5463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5464#[cfg_attr(test, assert_instr(vgetexppd))]
5465pub fn _mm256_getexp_pd(a: __m256d) -> __m256d {
5466 unsafe { transmute(src:vgetexppd256(a.as_f64x4(), src:f64x4::ZERO, m:0b00001111)) }
5467}
5468
5469/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5470///
5471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833)
5472#[inline]
5473#[target_feature(enable = "avx512f,avx512vl")]
5474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5475#[cfg_attr(test, assert_instr(vgetexppd))]
5476pub fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5477 unsafe { transmute(src:vgetexppd256(a.as_f64x4(), src.as_f64x4(), m:k)) }
5478}
5479
5480/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5481///
5482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834)
5483#[inline]
5484#[target_feature(enable = "avx512f,avx512vl")]
5485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5486#[cfg_attr(test, assert_instr(vgetexppd))]
5487pub fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
5488 unsafe { transmute(src:vgetexppd256(a.as_f64x4(), src:f64x4::ZERO, m:k)) }
5489}
5490
5491/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5492///
5493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829)
5494#[inline]
5495#[target_feature(enable = "avx512f,avx512vl")]
5496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5497#[cfg_attr(test, assert_instr(vgetexppd))]
5498pub fn _mm_getexp_pd(a: __m128d) -> __m128d {
5499 unsafe { transmute(src:vgetexppd128(a.as_f64x2(), src:f64x2::ZERO, m:0b00000011)) }
5500}
5501
5502/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5503///
5504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830)
5505#[inline]
5506#[target_feature(enable = "avx512f,avx512vl")]
5507#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5508#[cfg_attr(test, assert_instr(vgetexppd))]
5509pub fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5510 unsafe { transmute(src:vgetexppd128(a.as_f64x2(), src.as_f64x2(), m:k)) }
5511}
5512
5513/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5514///
5515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831)
5516#[inline]
5517#[target_feature(enable = "avx512f,avx512vl")]
5518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5519#[cfg_attr(test, assert_instr(vgetexppd))]
5520pub fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
5521 unsafe { transmute(src:vgetexppd128(a.as_f64x2(), src:f64x2::ZERO, m:k)) }
5522}
5523
5524/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5525/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5526/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5527/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5528/// * [`_MM_FROUND_TO_POS_INF`] : round up
5529/// * [`_MM_FROUND_TO_ZERO`] : truncate
5530/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5531///
5532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784)
5533#[inline]
5534#[target_feature(enable = "avx512f")]
5535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5536#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5537#[rustc_legacy_const_generics(1)]
5538pub fn _mm512_roundscale_ps<const IMM8: i32>(a: __m512) -> __m512 {
5539 unsafe {
5540 static_assert_uimm_bits!(IMM8, 8);
5541 let a: f32x16 = a.as_f32x16();
5542 let r: f32x16 = vrndscaleps(
5543 a,
5544 IMM8,
5545 src:f32x16::ZERO,
5546 mask:0b11111111_11111111,
5547 _MM_FROUND_CUR_DIRECTION,
5548 );
5549 transmute(src:r)
5550 }
5551}
5552
5553/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5554/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5555/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5556/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5557/// * [`_MM_FROUND_TO_POS_INF`] : round up
5558/// * [`_MM_FROUND_TO_ZERO`] : truncate
5559/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5560///
5561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782)
5562#[inline]
5563#[target_feature(enable = "avx512f")]
5564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5565#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5566#[rustc_legacy_const_generics(3)]
5567pub fn _mm512_mask_roundscale_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5568 unsafe {
5569 static_assert_uimm_bits!(IMM8, 8);
5570 let a: f32x16 = a.as_f32x16();
5571 let src: f32x16 = src.as_f32x16();
5572 let r: f32x16 = vrndscaleps(a, IMM8, src, mask:k, _MM_FROUND_CUR_DIRECTION);
5573 transmute(src:r)
5574 }
5575}
5576
5577/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5578/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5579/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5580/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5581/// * [`_MM_FROUND_TO_POS_INF`] : round up
5582/// * [`_MM_FROUND_TO_ZERO`] : truncate
5583/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5584///
5585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783)
5586#[inline]
5587#[target_feature(enable = "avx512f")]
5588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5589#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5590#[rustc_legacy_const_generics(2)]
5591pub fn _mm512_maskz_roundscale_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5592 unsafe {
5593 static_assert_uimm_bits!(IMM8, 8);
5594 let a: f32x16 = a.as_f32x16();
5595 let r: f32x16 = vrndscaleps(a, IMM8, src:f32x16::ZERO, mask:k, _MM_FROUND_CUR_DIRECTION);
5596 transmute(src:r)
5597 }
5598}
5599
5600/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5601/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5602/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5603/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5604/// * [`_MM_FROUND_TO_POS_INF`] : round up
5605/// * [`_MM_FROUND_TO_ZERO`] : truncate
5606/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5607///
5608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781)
5609#[inline]
5610#[target_feature(enable = "avx512f,avx512vl")]
5611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5612#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5613#[rustc_legacy_const_generics(1)]
5614pub fn _mm256_roundscale_ps<const IMM8: i32>(a: __m256) -> __m256 {
5615 unsafe {
5616 static_assert_uimm_bits!(IMM8, 8);
5617 let a: f32x8 = a.as_f32x8();
5618 let r: f32x8 = vrndscaleps256(a, IMM8, src:f32x8::ZERO, mask:0b11111111);
5619 transmute(src:r)
5620 }
5621}
5622
5623/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5624/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5625/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5626/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5627/// * [`_MM_FROUND_TO_POS_INF`] : round up
5628/// * [`_MM_FROUND_TO_ZERO`] : truncate
5629/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5630///
5631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779)
5632#[inline]
5633#[target_feature(enable = "avx512f,avx512vl")]
5634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5635#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5636#[rustc_legacy_const_generics(3)]
5637pub fn _mm256_mask_roundscale_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5638 unsafe {
5639 static_assert_uimm_bits!(IMM8, 8);
5640 let a: f32x8 = a.as_f32x8();
5641 let src: f32x8 = src.as_f32x8();
5642 let r: f32x8 = vrndscaleps256(a, IMM8, src, mask:k);
5643 transmute(src:r)
5644 }
5645}
5646
5647/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5648/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5649/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5650/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5651/// * [`_MM_FROUND_TO_POS_INF`] : round up
5652/// * [`_MM_FROUND_TO_ZERO`] : truncate
5653/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5654///
5655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780)
5656#[inline]
5657#[target_feature(enable = "avx512f,avx512vl")]
5658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5659#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5660#[rustc_legacy_const_generics(2)]
5661pub fn _mm256_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5662 unsafe {
5663 static_assert_uimm_bits!(IMM8, 8);
5664 let a: f32x8 = a.as_f32x8();
5665 let r: f32x8 = vrndscaleps256(a, IMM8, src:f32x8::ZERO, mask:k);
5666 transmute(src:r)
5667 }
5668}
5669
5670/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5671/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5672/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5673/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5674/// * [`_MM_FROUND_TO_POS_INF`] : round up
5675/// * [`_MM_FROUND_TO_ZERO`] : truncate
5676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5677///
5678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778)
5679#[inline]
5680#[target_feature(enable = "avx512f,avx512vl")]
5681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5682#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5683#[rustc_legacy_const_generics(1)]
5684pub fn _mm_roundscale_ps<const IMM8: i32>(a: __m128) -> __m128 {
5685 unsafe {
5686 static_assert_uimm_bits!(IMM8, 8);
5687 let a: f32x4 = a.as_f32x4();
5688 let r: f32x4 = vrndscaleps128(a, IMM8, src:f32x4::ZERO, mask:0b00001111);
5689 transmute(src:r)
5690 }
5691}
5692
5693/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5694/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5695/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5696/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5697/// * [`_MM_FROUND_TO_POS_INF`] : round up
5698/// * [`_MM_FROUND_TO_ZERO`] : truncate
5699/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5700///
5701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776)
5702#[inline]
5703#[target_feature(enable = "avx512f,avx512vl")]
5704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5705#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5706#[rustc_legacy_const_generics(3)]
5707pub fn _mm_mask_roundscale_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5708 unsafe {
5709 static_assert_uimm_bits!(IMM8, 8);
5710 let a: f32x4 = a.as_f32x4();
5711 let src: f32x4 = src.as_f32x4();
5712 let r: f32x4 = vrndscaleps128(a, IMM8, src, mask:k);
5713 transmute(src:r)
5714 }
5715}
5716
5717/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5718/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5719/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5720/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5721/// * [`_MM_FROUND_TO_POS_INF`] : round up
5722/// * [`_MM_FROUND_TO_ZERO`] : truncate
5723/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5724///
5725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777)
5726#[inline]
5727#[target_feature(enable = "avx512f,avx512vl")]
5728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5729#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5730#[rustc_legacy_const_generics(2)]
5731pub fn _mm_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
5732 unsafe {
5733 static_assert_uimm_bits!(IMM8, 8);
5734 let a: f32x4 = a.as_f32x4();
5735 let r: f32x4 = vrndscaleps128(a, IMM8, src:f32x4::ZERO, mask:k);
5736 transmute(src:r)
5737 }
5738}
5739
5740/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5741/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5742/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5743/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5744/// * [`_MM_FROUND_TO_POS_INF`] : round up
5745/// * [`_MM_FROUND_TO_ZERO`] : truncate
5746/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5747///
5748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775)
5749#[inline]
5750#[target_feature(enable = "avx512f")]
5751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5752#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5753#[rustc_legacy_const_generics(1)]
5754pub fn _mm512_roundscale_pd<const IMM8: i32>(a: __m512d) -> __m512d {
5755 unsafe {
5756 static_assert_uimm_bits!(IMM8, 8);
5757 let a: f64x8 = a.as_f64x8();
5758 let r: f64x8 = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:0b11111111, _MM_FROUND_CUR_DIRECTION);
5759 transmute(src:r)
5760 }
5761}
5762
5763/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5764/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5765/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5766/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5767/// * [`_MM_FROUND_TO_POS_INF`] : round up
5768/// * [`_MM_FROUND_TO_ZERO`] : truncate
5769/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5770///
5771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773)
5772#[inline]
5773#[target_feature(enable = "avx512f")]
5774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5775#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5776#[rustc_legacy_const_generics(3)]
5777pub fn _mm512_mask_roundscale_pd<const IMM8: i32>(
5778 src: __m512d,
5779 k: __mmask8,
5780 a: __m512d,
5781) -> __m512d {
5782 unsafe {
5783 static_assert_uimm_bits!(IMM8, 8);
5784 let a: f64x8 = a.as_f64x8();
5785 let src: f64x8 = src.as_f64x8();
5786 let r: f64x8 = vrndscalepd(a, IMM8, src, mask:k, _MM_FROUND_CUR_DIRECTION);
5787 transmute(src:r)
5788 }
5789}
5790
5791/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5792/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5793/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5794/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5795/// * [`_MM_FROUND_TO_POS_INF`] : round up
5796/// * [`_MM_FROUND_TO_ZERO`] : truncate
5797/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5798///
5799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774)
5800#[inline]
5801#[target_feature(enable = "avx512f")]
5802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5803#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5804#[rustc_legacy_const_generics(2)]
5805pub fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
5806 unsafe {
5807 static_assert_uimm_bits!(IMM8, 8);
5808 let a: f64x8 = a.as_f64x8();
5809 let r: f64x8 = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:k, _MM_FROUND_CUR_DIRECTION);
5810 transmute(src:r)
5811 }
5812}
5813
5814/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5815/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5816/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5817/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5818/// * [`_MM_FROUND_TO_POS_INF`] : round up
5819/// * [`_MM_FROUND_TO_ZERO`] : truncate
5820/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5821///
5822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772)
5823#[inline]
5824#[target_feature(enable = "avx512f,avx512vl")]
5825#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5826#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5827#[rustc_legacy_const_generics(1)]
5828pub fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
5829 unsafe {
5830 static_assert_uimm_bits!(IMM8, 8);
5831 let a: f64x4 = a.as_f64x4();
5832 let r: f64x4 = vrndscalepd256(a, IMM8, src:f64x4::ZERO, mask:0b00001111);
5833 transmute(src:r)
5834 }
5835}
5836
5837/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5838/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5839/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5840/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5841/// * [`_MM_FROUND_TO_POS_INF`] : round up
5842/// * [`_MM_FROUND_TO_ZERO`] : truncate
5843/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5844///
5845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770)
5846#[inline]
5847#[target_feature(enable = "avx512f,avx512vl")]
5848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5849#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5850#[rustc_legacy_const_generics(3)]
5851pub fn _mm256_mask_roundscale_pd<const IMM8: i32>(
5852 src: __m256d,
5853 k: __mmask8,
5854 a: __m256d,
5855) -> __m256d {
5856 unsafe {
5857 static_assert_uimm_bits!(IMM8, 8);
5858 let a: f64x4 = a.as_f64x4();
5859 let src: f64x4 = src.as_f64x4();
5860 let r: f64x4 = vrndscalepd256(a, IMM8, src, mask:k);
5861 transmute(src:r)
5862 }
5863}
5864
5865/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5866/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5867/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5868/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5869/// * [`_MM_FROUND_TO_POS_INF`] : round up
5870/// * [`_MM_FROUND_TO_ZERO`] : truncate
5871/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5872///
5873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771)
5874#[inline]
5875#[target_feature(enable = "avx512f,avx512vl")]
5876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5877#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5878#[rustc_legacy_const_generics(2)]
5879pub fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
5880 unsafe {
5881 static_assert_uimm_bits!(IMM8, 8);
5882 let a: f64x4 = a.as_f64x4();
5883 let r: f64x4 = vrndscalepd256(a, IMM8, src:f64x4::ZERO, mask:k);
5884 transmute(src:r)
5885 }
5886}
5887
5888/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5889/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5890/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5891/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5892/// * [`_MM_FROUND_TO_POS_INF`] : round up
5893/// * [`_MM_FROUND_TO_ZERO`] : truncate
5894/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5895///
5896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769)
5897#[inline]
5898#[target_feature(enable = "avx512f,avx512vl")]
5899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5900#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5901#[rustc_legacy_const_generics(1)]
5902pub fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
5903 unsafe {
5904 static_assert_uimm_bits!(IMM8, 8);
5905 let a: f64x2 = a.as_f64x2();
5906 let r: f64x2 = vrndscalepd128(a, IMM8, src:f64x2::ZERO, mask:0b00000011);
5907 transmute(src:r)
5908 }
5909}
5910
5911/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5912/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5913/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5914/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5915/// * [`_MM_FROUND_TO_POS_INF`] : round up
5916/// * [`_MM_FROUND_TO_ZERO`] : truncate
5917/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5918///
5919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767)
5920#[inline]
5921#[target_feature(enable = "avx512f,avx512vl")]
5922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5923#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5924#[rustc_legacy_const_generics(3)]
5925pub fn _mm_mask_roundscale_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5926 unsafe {
5927 static_assert_uimm_bits!(IMM8, 8);
5928 let a: f64x2 = a.as_f64x2();
5929 let src: f64x2 = src.as_f64x2();
5930 let r: f64x2 = vrndscalepd128(a, IMM8, src, mask:k);
5931 transmute(src:r)
5932 }
5933}
5934
5935/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5936/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5937/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5938/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5939/// * [`_MM_FROUND_TO_POS_INF`] : round up
5940/// * [`_MM_FROUND_TO_ZERO`] : truncate
5941/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5942///
5943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768)
5944#[inline]
5945#[target_feature(enable = "avx512f,avx512vl")]
5946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5947#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5948#[rustc_legacy_const_generics(2)]
5949pub fn _mm_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
5950 unsafe {
5951 static_assert_uimm_bits!(IMM8, 8);
5952 let a: f64x2 = a.as_f64x2();
5953 let r: f64x2 = vrndscalepd128(a, IMM8, src:f64x2::ZERO, mask:k);
5954 transmute(src:r)
5955 }
5956}
5957
5958/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5959///
5960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883)
5961#[inline]
5962#[target_feature(enable = "avx512f")]
5963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5964#[cfg_attr(test, assert_instr(vscalefps))]
5965pub fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
5966 unsafe {
5967 transmute(src:vscalefps(
5968 a.as_f32x16(),
5969 b.as_f32x16(),
5970 src:f32x16::ZERO,
5971 mask:0b11111111_11111111,
5972 _MM_FROUND_CUR_DIRECTION,
5973 ))
5974 }
5975}
5976
5977/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5978///
5979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881)
5980#[inline]
5981#[target_feature(enable = "avx512f")]
5982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
5983#[cfg_attr(test, assert_instr(vscalefps))]
5984pub fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
5985 unsafe {
5986 transmute(src:vscalefps(
5987 a.as_f32x16(),
5988 b.as_f32x16(),
5989 src.as_f32x16(),
5990 mask:k,
5991 _MM_FROUND_CUR_DIRECTION,
5992 ))
5993 }
5994}
5995
5996/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5997///
5998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882)
5999#[inline]
6000#[target_feature(enable = "avx512f")]
6001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6002#[cfg_attr(test, assert_instr(vscalefps))]
6003pub fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
6004 unsafe {
6005 transmute(src:vscalefps(
6006 a.as_f32x16(),
6007 b.as_f32x16(),
6008 src:f32x16::ZERO,
6009 mask:k,
6010 _MM_FROUND_CUR_DIRECTION,
6011 ))
6012 }
6013}
6014
6015/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6016///
6017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880)
6018#[inline]
6019#[target_feature(enable = "avx512f,avx512vl")]
6020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6021#[cfg_attr(test, assert_instr(vscalefps))]
6022pub fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
6023 unsafe {
6024 transmute(src:vscalefps256(
6025 a.as_f32x8(),
6026 b.as_f32x8(),
6027 src:f32x8::ZERO,
6028 mask:0b11111111,
6029 ))
6030 }
6031}
6032
6033/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6034///
6035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878)
6036#[inline]
6037#[target_feature(enable = "avx512f,avx512vl")]
6038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6039#[cfg_attr(test, assert_instr(vscalefps))]
6040pub fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
6041 unsafe { transmute(src:vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), mask:k)) }
6042}
6043
6044/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6045///
6046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879)
6047#[inline]
6048#[target_feature(enable = "avx512f,avx512vl")]
6049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6050#[cfg_attr(test, assert_instr(vscalefps))]
6051pub fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
6052 unsafe { transmute(src:vscalefps256(a.as_f32x8(), b.as_f32x8(), src:f32x8::ZERO, mask:k)) }
6053}
6054
6055/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6056///
6057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877)
6058#[inline]
6059#[target_feature(enable = "avx512f,avx512vl")]
6060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6061#[cfg_attr(test, assert_instr(vscalefps))]
6062pub fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
6063 unsafe {
6064 transmute(src:vscalefps128(
6065 a.as_f32x4(),
6066 b.as_f32x4(),
6067 src:f32x4::ZERO,
6068 mask:0b00001111,
6069 ))
6070 }
6071}
6072
6073/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6074///
6075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875)
6076#[inline]
6077#[target_feature(enable = "avx512f,avx512vl")]
6078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6079#[cfg_attr(test, assert_instr(vscalefps))]
6080pub fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
6081 unsafe { transmute(src:vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), mask:k)) }
6082}
6083
6084/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6085///
6086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876)
6087#[inline]
6088#[target_feature(enable = "avx512f,avx512vl")]
6089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6090#[cfg_attr(test, assert_instr(vscalefps))]
6091pub fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6092 unsafe { transmute(src:vscalefps128(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:k)) }
6093}
6094
6095/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6096///
6097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874)
6098#[inline]
6099#[target_feature(enable = "avx512f")]
6100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6101#[cfg_attr(test, assert_instr(vscalefpd))]
6102pub fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
6103 unsafe {
6104 transmute(src:vscalefpd(
6105 a.as_f64x8(),
6106 b.as_f64x8(),
6107 src:f64x8::ZERO,
6108 mask:0b11111111,
6109 _MM_FROUND_CUR_DIRECTION,
6110 ))
6111 }
6112}
6113
6114/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6115///
6116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872)
6117#[inline]
6118#[target_feature(enable = "avx512f")]
6119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6120#[cfg_attr(test, assert_instr(vscalefpd))]
6121pub fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6122 unsafe {
6123 transmute(src:vscalefpd(
6124 a.as_f64x8(),
6125 b.as_f64x8(),
6126 src.as_f64x8(),
6127 mask:k,
6128 _MM_FROUND_CUR_DIRECTION,
6129 ))
6130 }
6131}
6132
6133/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6134///
6135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873)
6136#[inline]
6137#[target_feature(enable = "avx512f")]
6138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6139#[cfg_attr(test, assert_instr(vscalefpd))]
6140pub fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6141 unsafe {
6142 transmute(src:vscalefpd(
6143 a.as_f64x8(),
6144 b.as_f64x8(),
6145 src:f64x8::ZERO,
6146 mask:k,
6147 _MM_FROUND_CUR_DIRECTION,
6148 ))
6149 }
6150}
6151
6152/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6153///
6154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871)
6155#[inline]
6156#[target_feature(enable = "avx512f,avx512vl")]
6157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6158#[cfg_attr(test, assert_instr(vscalefpd))]
6159pub fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
6160 unsafe {
6161 transmute(src:vscalefpd256(
6162 a.as_f64x4(),
6163 b.as_f64x4(),
6164 src:f64x4::ZERO,
6165 mask:0b00001111,
6166 ))
6167 }
6168}
6169
6170/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6171///
6172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869)
6173#[inline]
6174#[target_feature(enable = "avx512f,avx512vl")]
6175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6176#[cfg_attr(test, assert_instr(vscalefpd))]
6177pub fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6178 unsafe { transmute(src:vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), mask:k)) }
6179}
6180
6181/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6182///
6183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870)
6184#[inline]
6185#[target_feature(enable = "avx512f,avx512vl")]
6186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6187#[cfg_attr(test, assert_instr(vscalefpd))]
6188pub fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6189 unsafe { transmute(src:vscalefpd256(a.as_f64x4(), b.as_f64x4(), src:f64x4::ZERO, mask:k)) }
6190}
6191
6192/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6193///
6194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868)
6195#[inline]
6196#[target_feature(enable = "avx512f,avx512vl")]
6197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6198#[cfg_attr(test, assert_instr(vscalefpd))]
6199pub fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
6200 unsafe {
6201 transmute(src:vscalefpd128(
6202 a.as_f64x2(),
6203 b.as_f64x2(),
6204 src:f64x2::ZERO,
6205 mask:0b00000011,
6206 ))
6207 }
6208}
6209
6210/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6211///
6212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866)
6213#[inline]
6214#[target_feature(enable = "avx512f,avx512vl")]
6215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6216#[cfg_attr(test, assert_instr(vscalefpd))]
6217pub fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6218 unsafe { transmute(src:vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), mask:k)) }
6219}
6220
6221/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6222///
6223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867)
6224#[inline]
6225#[target_feature(enable = "avx512f,avx512vl")]
6226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6227#[cfg_attr(test, assert_instr(vscalefpd))]
6228pub fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6229 unsafe { transmute(src:vscalefpd128(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:k)) }
6230}
6231
6232/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6233///
6234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499)
6235#[inline]
6236#[target_feature(enable = "avx512f")]
6237#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6238#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6239#[rustc_legacy_const_generics(3)]
6240pub fn _mm512_fixupimm_ps<const IMM8: i32>(a: __m512, b: __m512, c: __m512i) -> __m512 {
6241 unsafe {
6242 static_assert_uimm_bits!(IMM8, 8);
6243 let a: f32x16 = a.as_f32x16();
6244 let b: f32x16 = b.as_f32x16();
6245 let c: i32x16 = c.as_i32x16();
6246 let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:0b11111111_11111111, _MM_FROUND_CUR_DIRECTION);
6247 transmute(src:r)
6248 }
6249}
6250
6251/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6252///
6253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500)
6254#[inline]
6255#[target_feature(enable = "avx512f")]
6256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6257#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6258#[rustc_legacy_const_generics(4)]
6259pub fn _mm512_mask_fixupimm_ps<const IMM8: i32>(
6260 a: __m512,
6261 k: __mmask16,
6262 b: __m512,
6263 c: __m512i,
6264) -> __m512 {
6265 unsafe {
6266 static_assert_uimm_bits!(IMM8, 8);
6267 let a: f32x16 = a.as_f32x16();
6268 let b: f32x16 = b.as_f32x16();
6269 let c: i32x16 = c.as_i32x16();
6270 let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6271 transmute(src:r)
6272 }
6273}
6274
6275/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6276///
6277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501)
6278#[inline]
6279#[target_feature(enable = "avx512f")]
6280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6281#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6282#[rustc_legacy_const_generics(4)]
6283pub fn _mm512_maskz_fixupimm_ps<const IMM8: i32>(
6284 k: __mmask16,
6285 a: __m512,
6286 b: __m512,
6287 c: __m512i,
6288) -> __m512 {
6289 unsafe {
6290 static_assert_uimm_bits!(IMM8, 8);
6291 let a: f32x16 = a.as_f32x16();
6292 let b: f32x16 = b.as_f32x16();
6293 let c: i32x16 = c.as_i32x16();
6294 let r: f32x16 = vfixupimmpsz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6295 transmute(src:r)
6296 }
6297}
6298
6299/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6300///
6301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496)
6302#[inline]
6303#[target_feature(enable = "avx512f,avx512vl")]
6304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6305#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6306#[rustc_legacy_const_generics(3)]
6307pub fn _mm256_fixupimm_ps<const IMM8: i32>(a: __m256, b: __m256, c: __m256i) -> __m256 {
6308 unsafe {
6309 static_assert_uimm_bits!(IMM8, 8);
6310 let a: f32x8 = a.as_f32x8();
6311 let b: f32x8 = b.as_f32x8();
6312 let c: i32x8 = c.as_i32x8();
6313 let r: f32x8 = vfixupimmps256(a, b, c, IMM8, mask:0b11111111);
6314 transmute(src:r)
6315 }
6316}
6317
6318/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6319///
6320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497)
6321#[inline]
6322#[target_feature(enable = "avx512f,avx512vl")]
6323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6324#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6325#[rustc_legacy_const_generics(4)]
6326pub fn _mm256_mask_fixupimm_ps<const IMM8: i32>(
6327 a: __m256,
6328 k: __mmask8,
6329 b: __m256,
6330 c: __m256i,
6331) -> __m256 {
6332 unsafe {
6333 static_assert_uimm_bits!(IMM8, 8);
6334 let a: f32x8 = a.as_f32x8();
6335 let b: f32x8 = b.as_f32x8();
6336 let c: i32x8 = c.as_i32x8();
6337 let r: f32x8 = vfixupimmps256(a, b, c, IMM8, mask:k);
6338 transmute(src:r)
6339 }
6340}
6341
6342/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6343///
6344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498)
6345#[inline]
6346#[target_feature(enable = "avx512f,avx512vl")]
6347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6348#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6349#[rustc_legacy_const_generics(4)]
6350pub fn _mm256_maskz_fixupimm_ps<const IMM8: i32>(
6351 k: __mmask8,
6352 a: __m256,
6353 b: __m256,
6354 c: __m256i,
6355) -> __m256 {
6356 unsafe {
6357 static_assert_uimm_bits!(IMM8, 8);
6358 let a: f32x8 = a.as_f32x8();
6359 let b: f32x8 = b.as_f32x8();
6360 let c: i32x8 = c.as_i32x8();
6361 let r: f32x8 = vfixupimmpsz256(a, b, c, IMM8, mask:k);
6362 transmute(src:r)
6363 }
6364}
6365
6366/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6367///
6368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493)
6369#[inline]
6370#[target_feature(enable = "avx512f,avx512vl")]
6371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6372#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6373#[rustc_legacy_const_generics(3)]
6374pub fn _mm_fixupimm_ps<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
6375 unsafe {
6376 static_assert_uimm_bits!(IMM8, 8);
6377 let a: f32x4 = a.as_f32x4();
6378 let b: f32x4 = b.as_f32x4();
6379 let c: i32x4 = c.as_i32x4();
6380 let r: f32x4 = vfixupimmps128(a, b, c, IMM8, mask:0b00001111);
6381 transmute(src:r)
6382 }
6383}
6384
6385/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6386///
6387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494)
6388#[inline]
6389#[target_feature(enable = "avx512f,avx512vl")]
6390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6391#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6392#[rustc_legacy_const_generics(4)]
6393pub fn _mm_mask_fixupimm_ps<const IMM8: i32>(
6394 a: __m128,
6395 k: __mmask8,
6396 b: __m128,
6397 c: __m128i,
6398) -> __m128 {
6399 unsafe {
6400 static_assert_uimm_bits!(IMM8, 8);
6401 let a: f32x4 = a.as_f32x4();
6402 let b: f32x4 = b.as_f32x4();
6403 let c: i32x4 = c.as_i32x4();
6404 let r: f32x4 = vfixupimmps128(a, b, c, IMM8, mask:k);
6405 transmute(src:r)
6406 }
6407}
6408
6409/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6410///
6411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495)
6412#[inline]
6413#[target_feature(enable = "avx512f,avx512vl")]
6414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6415#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6416#[rustc_legacy_const_generics(4)]
6417pub fn _mm_maskz_fixupimm_ps<const IMM8: i32>(
6418 k: __mmask8,
6419 a: __m128,
6420 b: __m128,
6421 c: __m128i,
6422) -> __m128 {
6423 unsafe {
6424 static_assert_uimm_bits!(IMM8, 8);
6425 let a: f32x4 = a.as_f32x4();
6426 let b: f32x4 = b.as_f32x4();
6427 let c: i32x4 = c.as_i32x4();
6428 let r: f32x4 = vfixupimmpsz128(a, b, c, IMM8, mask:k);
6429 transmute(src:r)
6430 }
6431}
6432
6433/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6434///
6435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490)
6436#[inline]
6437#[target_feature(enable = "avx512f")]
6438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6439#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6440#[rustc_legacy_const_generics(3)]
6441pub fn _mm512_fixupimm_pd<const IMM8: i32>(a: __m512d, b: __m512d, c: __m512i) -> __m512d {
6442 unsafe {
6443 static_assert_uimm_bits!(IMM8, 8);
6444 let a: f64x8 = a.as_f64x8();
6445 let b: f64x8 = b.as_f64x8();
6446 let c: i64x8 = c.as_i64x8();
6447 let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:0b11111111, _MM_FROUND_CUR_DIRECTION);
6448 transmute(src:r)
6449 }
6450}
6451
6452/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6453///
6454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491)
6455#[inline]
6456#[target_feature(enable = "avx512f")]
6457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6458#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6459#[rustc_legacy_const_generics(4)]
6460pub fn _mm512_mask_fixupimm_pd<const IMM8: i32>(
6461 a: __m512d,
6462 k: __mmask8,
6463 b: __m512d,
6464 c: __m512i,
6465) -> __m512d {
6466 unsafe {
6467 static_assert_uimm_bits!(IMM8, 8);
6468 let a: f64x8 = a.as_f64x8();
6469 let b: f64x8 = b.as_f64x8();
6470 let c: i64x8 = c.as_i64x8();
6471 let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6472 transmute(src:r)
6473 }
6474}
6475
6476/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6477///
6478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492)
6479#[inline]
6480#[target_feature(enable = "avx512f")]
6481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6482#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6483#[rustc_legacy_const_generics(4)]
6484pub fn _mm512_maskz_fixupimm_pd<const IMM8: i32>(
6485 k: __mmask8,
6486 a: __m512d,
6487 b: __m512d,
6488 c: __m512i,
6489) -> __m512d {
6490 unsafe {
6491 static_assert_uimm_bits!(IMM8, 8);
6492 let a: f64x8 = a.as_f64x8();
6493 let b: f64x8 = b.as_f64x8();
6494 let c: i64x8 = c.as_i64x8();
6495 let r: f64x8 = vfixupimmpdz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6496 transmute(src:r)
6497 }
6498}
6499
6500/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6501///
6502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487)
6503#[inline]
6504#[target_feature(enable = "avx512f,avx512vl")]
6505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6506#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6507#[rustc_legacy_const_generics(3)]
6508pub fn _mm256_fixupimm_pd<const IMM8: i32>(a: __m256d, b: __m256d, c: __m256i) -> __m256d {
6509 unsafe {
6510 static_assert_uimm_bits!(IMM8, 8);
6511 let a: f64x4 = a.as_f64x4();
6512 let b: f64x4 = b.as_f64x4();
6513 let c: i64x4 = c.as_i64x4();
6514 let r: f64x4 = vfixupimmpd256(a, b, c, IMM8, mask:0b00001111);
6515 transmute(src:r)
6516 }
6517}
6518
6519/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6520///
6521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488)
6522#[inline]
6523#[target_feature(enable = "avx512f,avx512vl")]
6524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6525#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6526#[rustc_legacy_const_generics(4)]
6527pub fn _mm256_mask_fixupimm_pd<const IMM8: i32>(
6528 a: __m256d,
6529 k: __mmask8,
6530 b: __m256d,
6531 c: __m256i,
6532) -> __m256d {
6533 unsafe {
6534 static_assert_uimm_bits!(IMM8, 8);
6535 let a: f64x4 = a.as_f64x4();
6536 let b: f64x4 = b.as_f64x4();
6537 let c: i64x4 = c.as_i64x4();
6538 let r: f64x4 = vfixupimmpd256(a, b, c, IMM8, mask:k);
6539 transmute(src:r)
6540 }
6541}
6542
6543/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6544///
6545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489)
6546#[inline]
6547#[target_feature(enable = "avx512f,avx512vl")]
6548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6549#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6550#[rustc_legacy_const_generics(4)]
6551pub fn _mm256_maskz_fixupimm_pd<const IMM8: i32>(
6552 k: __mmask8,
6553 a: __m256d,
6554 b: __m256d,
6555 c: __m256i,
6556) -> __m256d {
6557 unsafe {
6558 static_assert_uimm_bits!(IMM8, 8);
6559 let a: f64x4 = a.as_f64x4();
6560 let b: f64x4 = b.as_f64x4();
6561 let c: i64x4 = c.as_i64x4();
6562 let r: f64x4 = vfixupimmpdz256(a, b, c, IMM8, mask:k);
6563 transmute(src:r)
6564 }
6565}
6566
6567/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6568///
6569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484)
6570#[inline]
6571#[target_feature(enable = "avx512f,avx512vl")]
6572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6573#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6574#[rustc_legacy_const_generics(3)]
6575pub fn _mm_fixupimm_pd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
6576 unsafe {
6577 static_assert_uimm_bits!(IMM8, 8);
6578 let a: f64x2 = a.as_f64x2();
6579 let b: f64x2 = b.as_f64x2();
6580 let c: i64x2 = c.as_i64x2();
6581 let r: f64x2 = vfixupimmpd128(a, b, c, IMM8, mask:0b00000011);
6582 transmute(src:r)
6583 }
6584}
6585
6586/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6587///
6588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485)
6589#[inline]
6590#[target_feature(enable = "avx512f,avx512vl")]
6591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6592#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6593#[rustc_legacy_const_generics(4)]
6594pub fn _mm_mask_fixupimm_pd<const IMM8: i32>(
6595 a: __m128d,
6596 k: __mmask8,
6597 b: __m128d,
6598 c: __m128i,
6599) -> __m128d {
6600 unsafe {
6601 static_assert_uimm_bits!(IMM8, 8);
6602 let a: f64x2 = a.as_f64x2();
6603 let b: f64x2 = b.as_f64x2();
6604 let c: i64x2 = c.as_i64x2();
6605 let r: f64x2 = vfixupimmpd128(a, b, c, IMM8, mask:k);
6606 transmute(src:r)
6607 }
6608}
6609
6610/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6611///
6612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486)
6613#[inline]
6614#[target_feature(enable = "avx512f,avx512vl")]
6615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6616#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6617#[rustc_legacy_const_generics(4)]
6618pub fn _mm_maskz_fixupimm_pd<const IMM8: i32>(
6619 k: __mmask8,
6620 a: __m128d,
6621 b: __m128d,
6622 c: __m128i,
6623) -> __m128d {
6624 unsafe {
6625 static_assert_uimm_bits!(IMM8, 8);
6626 let a: f64x2 = a.as_f64x2();
6627 let b: f64x2 = b.as_f64x2();
6628 let c: i64x2 = c.as_i64x2();
6629 let r: f64x2 = vfixupimmpdz128(a, b, c, IMM8, mask:k);
6630 transmute(src:r)
6631 }
6632}
6633
6634/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6635///
6636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867)
6637#[inline]
6638#[target_feature(enable = "avx512f")]
6639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6640#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6641#[rustc_legacy_const_generics(3)]
6642pub fn _mm512_ternarylogic_epi32<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6643 unsafe {
6644 static_assert_uimm_bits!(IMM8, 8);
6645 let a: i32x16 = a.as_i32x16();
6646 let b: i32x16 = b.as_i32x16();
6647 let c: i32x16 = c.as_i32x16();
6648 let r: i32x16 = vpternlogd(a, b, c, IMM8);
6649 transmute(src:r)
6650 }
6651}
6652
6653/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6654///
6655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865)
6656#[inline]
6657#[target_feature(enable = "avx512f")]
6658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6659#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6660#[rustc_legacy_const_generics(4)]
6661pub fn _mm512_mask_ternarylogic_epi32<const IMM8: i32>(
6662 src: __m512i,
6663 k: __mmask16,
6664 a: __m512i,
6665 b: __m512i,
6666) -> __m512i {
6667 unsafe {
6668 static_assert_uimm_bits!(IMM8, 8);
6669 let src: i32x16 = src.as_i32x16();
6670 let a: i32x16 = a.as_i32x16();
6671 let b: i32x16 = b.as_i32x16();
6672 let r: i32x16 = vpternlogd(a:src, b:a, c:b, IMM8);
6673 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6674 }
6675}
6676
6677/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6678///
6679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866)
6680#[inline]
6681#[target_feature(enable = "avx512f")]
6682#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6683#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6684#[rustc_legacy_const_generics(4)]
6685pub fn _mm512_maskz_ternarylogic_epi32<const IMM8: i32>(
6686 k: __mmask16,
6687 a: __m512i,
6688 b: __m512i,
6689 c: __m512i,
6690) -> __m512i {
6691 unsafe {
6692 static_assert_uimm_bits!(IMM8, 8);
6693 let a: i32x16 = a.as_i32x16();
6694 let b: i32x16 = b.as_i32x16();
6695 let c: i32x16 = c.as_i32x16();
6696 let r: i32x16 = vpternlogd(a, b, c, IMM8);
6697 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
6698 }
6699}
6700
6701/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6702///
6703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864)
6704#[inline]
6705#[target_feature(enable = "avx512f,avx512vl")]
6706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6707#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6708#[rustc_legacy_const_generics(3)]
6709pub fn _mm256_ternarylogic_epi32<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6710 unsafe {
6711 static_assert_uimm_bits!(IMM8, 8);
6712 let a: i32x8 = a.as_i32x8();
6713 let b: i32x8 = b.as_i32x8();
6714 let c: i32x8 = c.as_i32x8();
6715 let r: i32x8 = vpternlogd256(a, b, c, IMM8);
6716 transmute(src:r)
6717 }
6718}
6719
6720/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6721///
6722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862)
6723#[inline]
6724#[target_feature(enable = "avx512f,avx512vl")]
6725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6726#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6727#[rustc_legacy_const_generics(4)]
6728pub fn _mm256_mask_ternarylogic_epi32<const IMM8: i32>(
6729 src: __m256i,
6730 k: __mmask8,
6731 a: __m256i,
6732 b: __m256i,
6733) -> __m256i {
6734 unsafe {
6735 static_assert_uimm_bits!(IMM8, 8);
6736 let src: i32x8 = src.as_i32x8();
6737 let a: i32x8 = a.as_i32x8();
6738 let b: i32x8 = b.as_i32x8();
6739 let r: i32x8 = vpternlogd256(a:src, b:a, c:b, IMM8);
6740 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6741 }
6742}
6743
6744/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6745///
6746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863)
6747#[inline]
6748#[target_feature(enable = "avx512f,avx512vl")]
6749#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6750#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6751#[rustc_legacy_const_generics(4)]
6752pub fn _mm256_maskz_ternarylogic_epi32<const IMM8: i32>(
6753 k: __mmask8,
6754 a: __m256i,
6755 b: __m256i,
6756 c: __m256i,
6757) -> __m256i {
6758 unsafe {
6759 static_assert_uimm_bits!(IMM8, 8);
6760 let a: i32x8 = a.as_i32x8();
6761 let b: i32x8 = b.as_i32x8();
6762 let c: i32x8 = c.as_i32x8();
6763 let r: i32x8 = vpternlogd256(a, b, c, IMM8);
6764 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
6765 }
6766}
6767
6768/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6769///
6770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861)
6771#[inline]
6772#[target_feature(enable = "avx512f,avx512vl")]
6773#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6774#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6775#[rustc_legacy_const_generics(3)]
6776pub fn _mm_ternarylogic_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6777 unsafe {
6778 static_assert_uimm_bits!(IMM8, 8);
6779 let a: i32x4 = a.as_i32x4();
6780 let b: i32x4 = b.as_i32x4();
6781 let c: i32x4 = c.as_i32x4();
6782 let r: i32x4 = vpternlogd128(a, b, c, IMM8);
6783 transmute(src:r)
6784 }
6785}
6786
6787/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6788///
6789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859)
6790#[inline]
6791#[target_feature(enable = "avx512f,avx512vl")]
6792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6793#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6794#[rustc_legacy_const_generics(4)]
6795pub fn _mm_mask_ternarylogic_epi32<const IMM8: i32>(
6796 src: __m128i,
6797 k: __mmask8,
6798 a: __m128i,
6799 b: __m128i,
6800) -> __m128i {
6801 unsafe {
6802 static_assert_uimm_bits!(IMM8, 8);
6803 let src: i32x4 = src.as_i32x4();
6804 let a: i32x4 = a.as_i32x4();
6805 let b: i32x4 = b.as_i32x4();
6806 let r: i32x4 = vpternlogd128(a:src, b:a, c:b, IMM8);
6807 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6808 }
6809}
6810
6811/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6812///
6813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860)
6814#[inline]
6815#[target_feature(enable = "avx512f,avx512vl")]
6816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6817#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6818#[rustc_legacy_const_generics(4)]
6819pub fn _mm_maskz_ternarylogic_epi32<const IMM8: i32>(
6820 k: __mmask8,
6821 a: __m128i,
6822 b: __m128i,
6823 c: __m128i,
6824) -> __m128i {
6825 unsafe {
6826 static_assert_uimm_bits!(IMM8, 8);
6827 let a: i32x4 = a.as_i32x4();
6828 let b: i32x4 = b.as_i32x4();
6829 let c: i32x4 = c.as_i32x4();
6830 let r: i32x4 = vpternlogd128(a, b, c, IMM8);
6831 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
6832 }
6833}
6834
6835/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6836///
6837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876)
6838#[inline]
6839#[target_feature(enable = "avx512f")]
6840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6841#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6842#[rustc_legacy_const_generics(3)]
6843pub fn _mm512_ternarylogic_epi64<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6844 unsafe {
6845 static_assert_uimm_bits!(IMM8, 8);
6846 let a: i64x8 = a.as_i64x8();
6847 let b: i64x8 = b.as_i64x8();
6848 let c: i64x8 = c.as_i64x8();
6849 let r: i64x8 = vpternlogq(a, b, c, IMM8);
6850 transmute(src:r)
6851 }
6852}
6853
6854/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6855///
6856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874)
6857#[inline]
6858#[target_feature(enable = "avx512f")]
6859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6860#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6861#[rustc_legacy_const_generics(4)]
6862pub fn _mm512_mask_ternarylogic_epi64<const IMM8: i32>(
6863 src: __m512i,
6864 k: __mmask8,
6865 a: __m512i,
6866 b: __m512i,
6867) -> __m512i {
6868 unsafe {
6869 static_assert_uimm_bits!(IMM8, 8);
6870 let src: i64x8 = src.as_i64x8();
6871 let a: i64x8 = a.as_i64x8();
6872 let b: i64x8 = b.as_i64x8();
6873 let r: i64x8 = vpternlogq(a:src, b:a, c:b, IMM8);
6874 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6875 }
6876}
6877
6878/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6879///
6880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875)
6881#[inline]
6882#[target_feature(enable = "avx512f")]
6883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6884#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6885#[rustc_legacy_const_generics(4)]
6886pub fn _mm512_maskz_ternarylogic_epi64<const IMM8: i32>(
6887 k: __mmask8,
6888 a: __m512i,
6889 b: __m512i,
6890 c: __m512i,
6891) -> __m512i {
6892 unsafe {
6893 static_assert_uimm_bits!(IMM8, 8);
6894 let a: i64x8 = a.as_i64x8();
6895 let b: i64x8 = b.as_i64x8();
6896 let c: i64x8 = c.as_i64x8();
6897 let r: i64x8 = vpternlogq(a, b, c, IMM8);
6898 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x8::ZERO))
6899 }
6900}
6901
6902/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6903///
6904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873)
6905#[inline]
6906#[target_feature(enable = "avx512f,avx512vl")]
6907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6908#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6909#[rustc_legacy_const_generics(3)]
6910pub fn _mm256_ternarylogic_epi64<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6911 unsafe {
6912 static_assert_uimm_bits!(IMM8, 8);
6913 let a: i64x4 = a.as_i64x4();
6914 let b: i64x4 = b.as_i64x4();
6915 let c: i64x4 = c.as_i64x4();
6916 let r: i64x4 = vpternlogq256(a, b, c, IMM8);
6917 transmute(src:r)
6918 }
6919}
6920
6921/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6922///
6923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871)
6924#[inline]
6925#[target_feature(enable = "avx512f,avx512vl")]
6926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6927#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6928#[rustc_legacy_const_generics(4)]
6929pub fn _mm256_mask_ternarylogic_epi64<const IMM8: i32>(
6930 src: __m256i,
6931 k: __mmask8,
6932 a: __m256i,
6933 b: __m256i,
6934) -> __m256i {
6935 unsafe {
6936 static_assert_uimm_bits!(IMM8, 8);
6937 let src: i64x4 = src.as_i64x4();
6938 let a: i64x4 = a.as_i64x4();
6939 let b: i64x4 = b.as_i64x4();
6940 let r: i64x4 = vpternlogq256(a:src, b:a, c:b, IMM8);
6941 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6942 }
6943}
6944
6945/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6946///
6947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872)
6948#[inline]
6949#[target_feature(enable = "avx512f,avx512vl")]
6950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6951#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6952#[rustc_legacy_const_generics(4)]
6953pub fn _mm256_maskz_ternarylogic_epi64<const IMM8: i32>(
6954 k: __mmask8,
6955 a: __m256i,
6956 b: __m256i,
6957 c: __m256i,
6958) -> __m256i {
6959 unsafe {
6960 static_assert_uimm_bits!(IMM8, 8);
6961 let a: i64x4 = a.as_i64x4();
6962 let b: i64x4 = b.as_i64x4();
6963 let c: i64x4 = c.as_i64x4();
6964 let r: i64x4 = vpternlogq256(a, b, c, IMM8);
6965 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x4::ZERO))
6966 }
6967}
6968
6969/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6970///
6971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870)
6972#[inline]
6973#[target_feature(enable = "avx512f,avx512vl")]
6974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6975#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6976#[rustc_legacy_const_generics(3)]
6977pub fn _mm_ternarylogic_epi64<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6978 unsafe {
6979 static_assert_uimm_bits!(IMM8, 8);
6980 let a: i64x2 = a.as_i64x2();
6981 let b: i64x2 = b.as_i64x2();
6982 let c: i64x2 = c.as_i64x2();
6983 let r: i64x2 = vpternlogq128(a, b, c, IMM8);
6984 transmute(src:r)
6985 }
6986}
6987
6988/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6989///
6990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868)
6991#[inline]
6992#[target_feature(enable = "avx512f,avx512vl")]
6993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
6994#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6995#[rustc_legacy_const_generics(4)]
6996pub fn _mm_mask_ternarylogic_epi64<const IMM8: i32>(
6997 src: __m128i,
6998 k: __mmask8,
6999 a: __m128i,
7000 b: __m128i,
7001) -> __m128i {
7002 unsafe {
7003 static_assert_uimm_bits!(IMM8, 8);
7004 let src: i64x2 = src.as_i64x2();
7005 let a: i64x2 = a.as_i64x2();
7006 let b: i64x2 = b.as_i64x2();
7007 let r: i64x2 = vpternlogq128(a:src, b:a, c:b, IMM8);
7008 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
7009 }
7010}
7011
7012/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7013///
7014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869)
7015#[inline]
7016#[target_feature(enable = "avx512f,avx512vl")]
7017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7018#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7019#[rustc_legacy_const_generics(4)]
7020pub fn _mm_maskz_ternarylogic_epi64<const IMM8: i32>(
7021 k: __mmask8,
7022 a: __m128i,
7023 b: __m128i,
7024 c: __m128i,
7025) -> __m128i {
7026 unsafe {
7027 static_assert_uimm_bits!(IMM8, 8);
7028 let a: i64x2 = a.as_i64x2();
7029 let b: i64x2 = b.as_i64x2();
7030 let c: i64x2 = c.as_i64x2();
7031 let r: i64x2 = vpternlogq128(a, b, c, IMM8);
7032 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x2::ZERO))
7033 }
7034}
7035
7036/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7037/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7038/// _MM_MANT_NORM_1_2 // interval [1, 2)
7039/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
7040/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
7041/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7042/// The sign is determined by sc which can take the following values:
7043/// _MM_MANT_SIGN_src // sign = sign(src)
7044/// _MM_MANT_SIGN_zero // sign = 0
7045/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7046///
7047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880)
7048#[inline]
7049#[target_feature(enable = "avx512f")]
7050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7051#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7052#[rustc_legacy_const_generics(1, 2)]
7053pub fn _mm512_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7054 a: __m512,
7055) -> __m512 {
7056 unsafe {
7057 static_assert_uimm_bits!(NORM, 4);
7058 static_assert_uimm_bits!(SIGN, 2);
7059 let a: f32x16 = a.as_f32x16();
7060 let zero: f32x16 = f32x16::ZERO;
7061 let r: f32x16 = vgetmantps(
7062 a,
7063 SIGN << 2 | NORM,
7064 src:zero,
7065 m:0b11111111_11111111,
7066 _MM_FROUND_CUR_DIRECTION,
7067 );
7068 transmute(src:r)
7069 }
7070}
7071
7072/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7073/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7074/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7075/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7076/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7077/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7078/// The sign is determined by sc which can take the following values:\
7079/// _MM_MANT_SIGN_src // sign = sign(src)\
7080/// _MM_MANT_SIGN_zero // sign = 0\
7081/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7082///
7083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881)
7084#[inline]
7085#[target_feature(enable = "avx512f")]
7086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7087#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7088#[rustc_legacy_const_generics(3, 4)]
7089pub fn _mm512_mask_getmant_ps<
7090 const NORM: _MM_MANTISSA_NORM_ENUM,
7091 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7092>(
7093 src: __m512,
7094 k: __mmask16,
7095 a: __m512,
7096) -> __m512 {
7097 unsafe {
7098 static_assert_uimm_bits!(NORM, 4);
7099 static_assert_uimm_bits!(SIGN, 2);
7100 let a: f32x16 = a.as_f32x16();
7101 let src: f32x16 = src.as_f32x16();
7102 let r: f32x16 = vgetmantps(a, SIGN << 2 | NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
7103 transmute(src:r)
7104 }
7105}
7106
7107/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7108/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7109/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7110/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7111/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7112/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7113/// The sign is determined by sc which can take the following values:\
7114/// _MM_MANT_SIGN_src // sign = sign(src)\
7115/// _MM_MANT_SIGN_zero // sign = 0\
7116/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7117///
7118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882)
7119#[inline]
7120#[target_feature(enable = "avx512f")]
7121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7122#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7123#[rustc_legacy_const_generics(2, 3)]
7124pub fn _mm512_maskz_getmant_ps<
7125 const NORM: _MM_MANTISSA_NORM_ENUM,
7126 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7127>(
7128 k: __mmask16,
7129 a: __m512,
7130) -> __m512 {
7131 unsafe {
7132 static_assert_uimm_bits!(NORM, 4);
7133 static_assert_uimm_bits!(SIGN, 2);
7134 let a: f32x16 = a.as_f32x16();
7135 let r: f32x16 = vgetmantps(
7136 a,
7137 SIGN << 2 | NORM,
7138 src:f32x16::ZERO,
7139 m:k,
7140 _MM_FROUND_CUR_DIRECTION,
7141 );
7142 transmute(src:r)
7143 }
7144}
7145
7146/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7147/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7148/// _MM_MANT_NORM_1_2 // interval [1, 2)
7149/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
7150/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
7151/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7152/// The sign is determined by sc which can take the following values:
7153/// _MM_MANT_SIGN_src // sign = sign(src)
7154/// _MM_MANT_SIGN_zero // sign = 0
7155/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7156///
7157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877)
7158#[inline]
7159#[target_feature(enable = "avx512f,avx512vl")]
7160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7161#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7162#[rustc_legacy_const_generics(1, 2)]
7163pub fn _mm256_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7164 a: __m256,
7165) -> __m256 {
7166 unsafe {
7167 static_assert_uimm_bits!(NORM, 4);
7168 static_assert_uimm_bits!(SIGN, 2);
7169 let a: f32x8 = a.as_f32x8();
7170 let r: f32x8 = vgetmantps256(a, SIGN << 2 | NORM, src:f32x8::ZERO, m:0b11111111);
7171 transmute(src:r)
7172 }
7173}
7174
7175/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7176/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7177/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7178/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7179/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7180/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7181/// The sign is determined by sc which can take the following values:\
7182/// _MM_MANT_SIGN_src // sign = sign(src)\
7183/// _MM_MANT_SIGN_zero // sign = 0\
7184/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7185///
7186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878)
7187#[inline]
7188#[target_feature(enable = "avx512f,avx512vl")]
7189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7190#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7191#[rustc_legacy_const_generics(3, 4)]
7192pub fn _mm256_mask_getmant_ps<
7193 const NORM: _MM_MANTISSA_NORM_ENUM,
7194 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7195>(
7196 src: __m256,
7197 k: __mmask8,
7198 a: __m256,
7199) -> __m256 {
7200 unsafe {
7201 static_assert_uimm_bits!(NORM, 4);
7202 static_assert_uimm_bits!(SIGN, 2);
7203 let a: f32x8 = a.as_f32x8();
7204 let src: f32x8 = src.as_f32x8();
7205 let r: f32x8 = vgetmantps256(a, SIGN << 2 | NORM, src, m:k);
7206 transmute(src:r)
7207 }
7208}
7209
7210/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7211/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7212/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7213/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7214/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7215/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7216/// The sign is determined by sc which can take the following values:\
7217/// _MM_MANT_SIGN_src // sign = sign(src)\
7218/// _MM_MANT_SIGN_zero // sign = 0\
7219/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7220///
7221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879)
7222#[inline]
7223#[target_feature(enable = "avx512f,avx512vl")]
7224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7225#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7226#[rustc_legacy_const_generics(2, 3)]
7227pub fn _mm256_maskz_getmant_ps<
7228 const NORM: _MM_MANTISSA_NORM_ENUM,
7229 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7230>(
7231 k: __mmask8,
7232 a: __m256,
7233) -> __m256 {
7234 unsafe {
7235 static_assert_uimm_bits!(NORM, 4);
7236 static_assert_uimm_bits!(SIGN, 2);
7237 let a: f32x8 = a.as_f32x8();
7238 let r: f32x8 = vgetmantps256(a, SIGN << 2 | NORM, src:f32x8::ZERO, m:k);
7239 transmute(src:r)
7240 }
7241}
7242
7243/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7244/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7245/// _MM_MANT_NORM_1_2 // interval [1, 2)
7246/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
7247/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
7248/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7249/// The sign is determined by sc which can take the following values:
7250/// _MM_MANT_SIGN_src // sign = sign(src)
7251/// _MM_MANT_SIGN_zero // sign = 0
7252/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7253///
7254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874)
7255#[inline]
7256#[target_feature(enable = "avx512f,avx512vl")]
7257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7258#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7259#[rustc_legacy_const_generics(1, 2)]
7260pub fn _mm_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7261 a: __m128,
7262) -> __m128 {
7263 unsafe {
7264 static_assert_uimm_bits!(NORM, 4);
7265 static_assert_uimm_bits!(SIGN, 2);
7266 let a: f32x4 = a.as_f32x4();
7267 let r: f32x4 = vgetmantps128(a, SIGN << 2 | NORM, src:f32x4::ZERO, m:0b00001111);
7268 transmute(src:r)
7269 }
7270}
7271
7272/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7273/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7274/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7275/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7276/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7277/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7278/// The sign is determined by sc which can take the following values:\
7279/// _MM_MANT_SIGN_src // sign = sign(src)\
7280/// _MM_MANT_SIGN_zero // sign = 0\
7281/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7282///
7283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875)
7284#[inline]
7285#[target_feature(enable = "avx512f,avx512vl")]
7286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7287#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7288#[rustc_legacy_const_generics(3, 4)]
7289pub fn _mm_mask_getmant_ps<
7290 const NORM: _MM_MANTISSA_NORM_ENUM,
7291 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7292>(
7293 src: __m128,
7294 k: __mmask8,
7295 a: __m128,
7296) -> __m128 {
7297 unsafe {
7298 static_assert_uimm_bits!(NORM, 4);
7299 static_assert_uimm_bits!(SIGN, 2);
7300 let a: f32x4 = a.as_f32x4();
7301 let src: f32x4 = src.as_f32x4();
7302 let r: f32x4 = vgetmantps128(a, SIGN << 2 | NORM, src, m:k);
7303 transmute(src:r)
7304 }
7305}
7306
7307/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7308/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7309/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7310/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7311/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7312/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7313/// The sign is determined by sc which can take the following values:\
7314/// _MM_MANT_SIGN_src // sign = sign(src)\
7315/// _MM_MANT_SIGN_zero // sign = 0\
7316/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7317///
7318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876)
7319#[inline]
7320#[target_feature(enable = "avx512f,avx512vl")]
7321#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7322#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7323#[rustc_legacy_const_generics(2, 3)]
7324pub fn _mm_maskz_getmant_ps<
7325 const NORM: _MM_MANTISSA_NORM_ENUM,
7326 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7327>(
7328 k: __mmask8,
7329 a: __m128,
7330) -> __m128 {
7331 unsafe {
7332 static_assert_uimm_bits!(NORM, 4);
7333 static_assert_uimm_bits!(SIGN, 2);
7334 let a: f32x4 = a.as_f32x4();
7335 let r: f32x4 = vgetmantps128(a, SIGN << 2 | NORM, src:f32x4::ZERO, m:k);
7336 transmute(src:r)
7337 }
7338}
7339
7340/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7341/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7342/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7343/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7344/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7345/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7346/// The sign is determined by sc which can take the following values:\
7347/// _MM_MANT_SIGN_src // sign = sign(src)\
7348/// _MM_MANT_SIGN_zero // sign = 0\
7349/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7350///
7351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871)
7352#[inline]
7353#[target_feature(enable = "avx512f")]
7354#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7355#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7356#[rustc_legacy_const_generics(1, 2)]
7357pub fn _mm512_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7358 a: __m512d,
7359) -> __m512d {
7360 unsafe {
7361 static_assert_uimm_bits!(NORM, 4);
7362 static_assert_uimm_bits!(SIGN, 2);
7363 let a: f64x8 = a.as_f64x8();
7364 let zero: f64x8 = f64x8::ZERO;
7365 let r: f64x8 = vgetmantpd(
7366 a,
7367 SIGN << 2 | NORM,
7368 src:zero,
7369 m:0b11111111,
7370 _MM_FROUND_CUR_DIRECTION,
7371 );
7372 transmute(src:r)
7373 }
7374}
7375
7376/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7377/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7378/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7379/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7380/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7381/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7382/// The sign is determined by sc which can take the following values:\
7383/// _MM_MANT_SIGN_src // sign = sign(src)\
7384/// _MM_MANT_SIGN_zero // sign = 0\
7385/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7386///
7387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872)
7388#[inline]
7389#[target_feature(enable = "avx512f")]
7390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7391#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7392#[rustc_legacy_const_generics(3, 4)]
7393pub fn _mm512_mask_getmant_pd<
7394 const NORM: _MM_MANTISSA_NORM_ENUM,
7395 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7396>(
7397 src: __m512d,
7398 k: __mmask8,
7399 a: __m512d,
7400) -> __m512d {
7401 unsafe {
7402 static_assert_uimm_bits!(NORM, 4);
7403 static_assert_uimm_bits!(SIGN, 2);
7404 let a: f64x8 = a.as_f64x8();
7405 let src: f64x8 = src.as_f64x8();
7406 let r: f64x8 = vgetmantpd(a, SIGN << 2 | NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
7407 transmute(src:r)
7408 }
7409}
7410
7411/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7412/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7413/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7414/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7415/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7416/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7417/// The sign is determined by sc which can take the following values:\
7418/// _MM_MANT_SIGN_src // sign = sign(src)\
7419/// _MM_MANT_SIGN_zero // sign = 0\
7420/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7421///
7422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873)
7423#[inline]
7424#[target_feature(enable = "avx512f")]
7425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7426#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7427#[rustc_legacy_const_generics(2, 3)]
7428pub fn _mm512_maskz_getmant_pd<
7429 const NORM: _MM_MANTISSA_NORM_ENUM,
7430 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7431>(
7432 k: __mmask8,
7433 a: __m512d,
7434) -> __m512d {
7435 unsafe {
7436 static_assert_uimm_bits!(NORM, 4);
7437 static_assert_uimm_bits!(SIGN, 2);
7438 let a: f64x8 = a.as_f64x8();
7439 let r: f64x8 = vgetmantpd(
7440 a,
7441 SIGN << 2 | NORM,
7442 src:f64x8::ZERO,
7443 m:k,
7444 _MM_FROUND_CUR_DIRECTION,
7445 );
7446 transmute(src:r)
7447 }
7448}
7449
7450/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7451/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7452/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7453/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7454/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7455/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7456/// The sign is determined by sc which can take the following values:\
7457/// _MM_MANT_SIGN_src // sign = sign(src)\
7458/// _MM_MANT_SIGN_zero // sign = 0\
7459/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7460///
7461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868)
7462#[inline]
7463#[target_feature(enable = "avx512f,avx512vl")]
7464#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7465#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7466#[rustc_legacy_const_generics(1, 2)]
7467pub fn _mm256_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7468 a: __m256d,
7469) -> __m256d {
7470 unsafe {
7471 static_assert_uimm_bits!(NORM, 4);
7472 static_assert_uimm_bits!(SIGN, 2);
7473 let a: f64x4 = a.as_f64x4();
7474 let r: f64x4 = vgetmantpd256(a, SIGN << 2 | NORM, src:f64x4::ZERO, m:0b00001111);
7475 transmute(src:r)
7476 }
7477}
7478
7479/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7480/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7481/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7482/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7483/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7484/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7485/// The sign is determined by sc which can take the following values:\
7486/// _MM_MANT_SIGN_src // sign = sign(src)\
7487/// _MM_MANT_SIGN_zero // sign = 0\
7488/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7489///
7490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869)
7491#[inline]
7492#[target_feature(enable = "avx512f,avx512vl")]
7493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7494#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7495#[rustc_legacy_const_generics(3, 4)]
7496pub fn _mm256_mask_getmant_pd<
7497 const NORM: _MM_MANTISSA_NORM_ENUM,
7498 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7499>(
7500 src: __m256d,
7501 k: __mmask8,
7502 a: __m256d,
7503) -> __m256d {
7504 unsafe {
7505 static_assert_uimm_bits!(NORM, 4);
7506 static_assert_uimm_bits!(SIGN, 2);
7507 let a: f64x4 = a.as_f64x4();
7508 let src: f64x4 = src.as_f64x4();
7509 let r: f64x4 = vgetmantpd256(a, SIGN << 2 | NORM, src, m:k);
7510 transmute(src:r)
7511 }
7512}
7513
7514/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7515/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7516/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7517/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7518/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7519/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7520/// The sign is determined by sc which can take the following values:\
7521/// _MM_MANT_SIGN_src // sign = sign(src)\
7522/// _MM_MANT_SIGN_zero // sign = 0\
7523/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7524///
7525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870)
7526#[inline]
7527#[target_feature(enable = "avx512f,avx512vl")]
7528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7529#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7530#[rustc_legacy_const_generics(2, 3)]
7531pub fn _mm256_maskz_getmant_pd<
7532 const NORM: _MM_MANTISSA_NORM_ENUM,
7533 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7534>(
7535 k: __mmask8,
7536 a: __m256d,
7537) -> __m256d {
7538 unsafe {
7539 static_assert_uimm_bits!(NORM, 4);
7540 static_assert_uimm_bits!(SIGN, 2);
7541 let a: f64x4 = a.as_f64x4();
7542 let r: f64x4 = vgetmantpd256(a, SIGN << 2 | NORM, src:f64x4::ZERO, m:k);
7543 transmute(src:r)
7544 }
7545}
7546
7547/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7548/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7549/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7550/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7551/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7552/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7553/// The sign is determined by sc which can take the following values:\
7554/// _MM_MANT_SIGN_src // sign = sign(src)\
7555/// _MM_MANT_SIGN_zero // sign = 0\
7556/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7557///
7558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865)
7559#[inline]
7560#[target_feature(enable = "avx512f,avx512vl")]
7561#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7562#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7563#[rustc_legacy_const_generics(1, 2)]
7564pub fn _mm_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7565 a: __m128d,
7566) -> __m128d {
7567 unsafe {
7568 static_assert_uimm_bits!(NORM, 4);
7569 static_assert_uimm_bits!(SIGN, 2);
7570 let a: f64x2 = a.as_f64x2();
7571 let r: f64x2 = vgetmantpd128(a, SIGN << 2 | NORM, src:f64x2::ZERO, m:0b00000011);
7572 transmute(src:r)
7573 }
7574}
7575
7576/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7577/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7578/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7579/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7580/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7581/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7582/// The sign is determined by sc which can take the following values:\
7583/// _MM_MANT_SIGN_src // sign = sign(src)\
7584/// _MM_MANT_SIGN_zero // sign = 0\
7585/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7586///
7587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866)
7588#[inline]
7589#[target_feature(enable = "avx512f,avx512vl")]
7590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7591#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7592#[rustc_legacy_const_generics(3, 4)]
7593pub fn _mm_mask_getmant_pd<
7594 const NORM: _MM_MANTISSA_NORM_ENUM,
7595 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7596>(
7597 src: __m128d,
7598 k: __mmask8,
7599 a: __m128d,
7600) -> __m128d {
7601 unsafe {
7602 static_assert_uimm_bits!(NORM, 4);
7603 static_assert_uimm_bits!(SIGN, 2);
7604 let a: f64x2 = a.as_f64x2();
7605 let src: f64x2 = src.as_f64x2();
7606 let r: f64x2 = vgetmantpd128(a, SIGN << 2 | NORM, src, m:k);
7607 transmute(src:r)
7608 }
7609}
7610
7611/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7612/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7613/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7614/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7615/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7616/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7617/// The sign is determined by sc which can take the following values:\
7618/// _MM_MANT_SIGN_src // sign = sign(src)\
7619/// _MM_MANT_SIGN_zero // sign = 0\
7620/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7621///
7622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867)
7623#[inline]
7624#[target_feature(enable = "avx512f,avx512vl")]
7625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7626#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7627#[rustc_legacy_const_generics(2, 3)]
7628pub fn _mm_maskz_getmant_pd<
7629 const NORM: _MM_MANTISSA_NORM_ENUM,
7630 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7631>(
7632 k: __mmask8,
7633 a: __m128d,
7634) -> __m128d {
7635 unsafe {
7636 static_assert_uimm_bits!(NORM, 4);
7637 static_assert_uimm_bits!(SIGN, 2);
7638 let a: f64x2 = a.as_f64x2();
7639 let r: f64x2 = vgetmantpd128(a, SIGN << 2 | NORM, src:f64x2::ZERO, m:k);
7640 transmute(src:r)
7641 }
7642}
7643
7644/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7645///
7646/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7647/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7648/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7649/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7650/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7651/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7652///
7653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145)
7654#[inline]
7655#[target_feature(enable = "avx512f")]
7656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7657#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7658#[rustc_legacy_const_generics(2)]
7659pub fn _mm512_add_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7660 unsafe {
7661 static_assert_rounding!(ROUNDING);
7662 let a: f32x16 = a.as_f32x16();
7663 let b: f32x16 = b.as_f32x16();
7664 let r: f32x16 = vaddps(a, b, ROUNDING);
7665 transmute(src:r)
7666 }
7667}
7668
7669/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7670///
7671/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7672/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7673/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7674/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7675/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7677///
7678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146)
7679#[inline]
7680#[target_feature(enable = "avx512f")]
7681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7682#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7683#[rustc_legacy_const_generics(4)]
7684pub fn _mm512_mask_add_round_ps<const ROUNDING: i32>(
7685 src: __m512,
7686 k: __mmask16,
7687 a: __m512,
7688 b: __m512,
7689) -> __m512 {
7690 unsafe {
7691 static_assert_rounding!(ROUNDING);
7692 let a: f32x16 = a.as_f32x16();
7693 let b: f32x16 = b.as_f32x16();
7694 let r: f32x16 = vaddps(a, b, ROUNDING);
7695 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
7696 }
7697}
7698
7699/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7700///
7701/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7702/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7703/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7704/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7705/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7706/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7707///
7708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147)
7709#[inline]
7710#[target_feature(enable = "avx512f")]
7711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7712#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7713#[rustc_legacy_const_generics(3)]
7714pub fn _mm512_maskz_add_round_ps<const ROUNDING: i32>(
7715 k: __mmask16,
7716 a: __m512,
7717 b: __m512,
7718) -> __m512 {
7719 unsafe {
7720 static_assert_rounding!(ROUNDING);
7721 let a: f32x16 = a.as_f32x16();
7722 let b: f32x16 = b.as_f32x16();
7723 let r: f32x16 = vaddps(a, b, ROUNDING);
7724 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
7725 }
7726}
7727
7728/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
7729///
7730/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7731/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7732/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7733/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7734/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7735/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7736///
7737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142)
7738#[inline]
7739#[target_feature(enable = "avx512f")]
7740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7741#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7742#[rustc_legacy_const_generics(2)]
7743pub fn _mm512_add_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7744 unsafe {
7745 static_assert_rounding!(ROUNDING);
7746 let a: f64x8 = a.as_f64x8();
7747 let b: f64x8 = b.as_f64x8();
7748 let r: f64x8 = vaddpd(a, b, ROUNDING);
7749 transmute(src:r)
7750 }
7751}
7752
7753/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7754///
7755/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7756/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7757/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7758/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7759/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7760/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7761///
7762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143)
7763#[inline]
7764#[target_feature(enable = "avx512f")]
7765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7766#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7767#[rustc_legacy_const_generics(4)]
7768pub fn _mm512_mask_add_round_pd<const ROUNDING: i32>(
7769 src: __m512d,
7770 k: __mmask8,
7771 a: __m512d,
7772 b: __m512d,
7773) -> __m512d {
7774 unsafe {
7775 static_assert_rounding!(ROUNDING);
7776 let a: f64x8 = a.as_f64x8();
7777 let b: f64x8 = b.as_f64x8();
7778 let r: f64x8 = vaddpd(a, b, ROUNDING);
7779 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
7780 }
7781}
7782
7783/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7784///
7785/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7786/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7787/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7788/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7789/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7790/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7791///
7792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144)
7793#[inline]
7794#[target_feature(enable = "avx512f")]
7795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7796#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7797#[rustc_legacy_const_generics(3)]
7798pub fn _mm512_maskz_add_round_pd<const ROUNDING: i32>(
7799 k: __mmask8,
7800 a: __m512d,
7801 b: __m512d,
7802) -> __m512d {
7803 unsafe {
7804 static_assert_rounding!(ROUNDING);
7805 let a: f64x8 = a.as_f64x8();
7806 let b: f64x8 = b.as_f64x8();
7807 let r: f64x8 = vaddpd(a, b, ROUNDING);
7808 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
7809 }
7810}
7811
7812/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
7813///
7814/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7815/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7816/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7817/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7818/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7819/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7820///
7821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739)
7822#[inline]
7823#[target_feature(enable = "avx512f")]
7824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7825#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7826#[rustc_legacy_const_generics(2)]
7827pub fn _mm512_sub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7828 unsafe {
7829 static_assert_rounding!(ROUNDING);
7830 let a: f32x16 = a.as_f32x16();
7831 let b: f32x16 = b.as_f32x16();
7832 let r: f32x16 = vsubps(a, b, ROUNDING);
7833 transmute(src:r)
7834 }
7835}
7836
7837/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7838///
7839/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7840/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7841/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7842/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7843/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7844/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7845///
7846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737)
7847#[inline]
7848#[target_feature(enable = "avx512f")]
7849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7850#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7851#[rustc_legacy_const_generics(4)]
7852pub fn _mm512_mask_sub_round_ps<const ROUNDING: i32>(
7853 src: __m512,
7854 k: __mmask16,
7855 a: __m512,
7856 b: __m512,
7857) -> __m512 {
7858 unsafe {
7859 static_assert_rounding!(ROUNDING);
7860 let a: f32x16 = a.as_f32x16();
7861 let b: f32x16 = b.as_f32x16();
7862 let r: f32x16 = vsubps(a, b, ROUNDING);
7863 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
7864 }
7865}
7866
7867/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7868///
7869/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7870/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7871/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7872/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7873/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7874/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7875///
7876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738)
7877#[inline]
7878#[target_feature(enable = "avx512f")]
7879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7880#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7881#[rustc_legacy_const_generics(3)]
7882pub fn _mm512_maskz_sub_round_ps<const ROUNDING: i32>(
7883 k: __mmask16,
7884 a: __m512,
7885 b: __m512,
7886) -> __m512 {
7887 unsafe {
7888 static_assert_rounding!(ROUNDING);
7889 let a: f32x16 = a.as_f32x16();
7890 let b: f32x16 = b.as_f32x16();
7891 let r: f32x16 = vsubps(a, b, ROUNDING);
7892 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
7893 }
7894}
7895
7896/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
7897///
7898/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7904///
7905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736)
7906#[inline]
7907#[target_feature(enable = "avx512f")]
7908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7909#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7910#[rustc_legacy_const_generics(2)]
7911pub fn _mm512_sub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7912 unsafe {
7913 static_assert_rounding!(ROUNDING);
7914 let a: f64x8 = a.as_f64x8();
7915 let b: f64x8 = b.as_f64x8();
7916 let r: f64x8 = vsubpd(a, b, ROUNDING);
7917 transmute(src:r)
7918 }
7919}
7920
7921/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7922///
7923/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7924/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7925/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7926/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7927/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7928/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7929///
7930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734)
7931#[inline]
7932#[target_feature(enable = "avx512f")]
7933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7934#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7935#[rustc_legacy_const_generics(4)]
7936pub fn _mm512_mask_sub_round_pd<const ROUNDING: i32>(
7937 src: __m512d,
7938 k: __mmask8,
7939 a: __m512d,
7940 b: __m512d,
7941) -> __m512d {
7942 unsafe {
7943 static_assert_rounding!(ROUNDING);
7944 let a: f64x8 = a.as_f64x8();
7945 let b: f64x8 = b.as_f64x8();
7946 let r: f64x8 = vsubpd(a, b, ROUNDING);
7947 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
7948 }
7949}
7950
7951/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7952///
7953/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7954/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7955/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7956/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7957/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7958/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7959///
7960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735)
7961#[inline]
7962#[target_feature(enable = "avx512f")]
7963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7964#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7965#[rustc_legacy_const_generics(3)]
7966pub fn _mm512_maskz_sub_round_pd<const ROUNDING: i32>(
7967 k: __mmask8,
7968 a: __m512d,
7969 b: __m512d,
7970) -> __m512d {
7971 unsafe {
7972 static_assert_rounding!(ROUNDING);
7973 let a: f64x8 = a.as_f64x8();
7974 let b: f64x8 = b.as_f64x8();
7975 let r: f64x8 = vsubpd(a, b, ROUNDING);
7976 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
7977 }
7978}
7979
7980/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7981///
7982/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7983/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7984/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7985/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7986/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7987/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7988///
7989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940)
7990#[inline]
7991#[target_feature(enable = "avx512f")]
7992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
7993#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
7994#[rustc_legacy_const_generics(2)]
7995pub fn _mm512_mul_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7996 unsafe {
7997 static_assert_rounding!(ROUNDING);
7998 let a: f32x16 = a.as_f32x16();
7999 let b: f32x16 = b.as_f32x16();
8000 let r: f32x16 = vmulps(a, b, ROUNDING);
8001 transmute(src:r)
8002 }
8003}
8004
8005/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8006///
8007/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8008/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8009/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8010/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8011/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8012/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8013///
8014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938)
8015#[inline]
8016#[target_feature(enable = "avx512f")]
8017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8018#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8019#[rustc_legacy_const_generics(4)]
8020pub fn _mm512_mask_mul_round_ps<const ROUNDING: i32>(
8021 src: __m512,
8022 k: __mmask16,
8023 a: __m512,
8024 b: __m512,
8025) -> __m512 {
8026 unsafe {
8027 static_assert_rounding!(ROUNDING);
8028 let a: f32x16 = a.as_f32x16();
8029 let b: f32x16 = b.as_f32x16();
8030 let r: f32x16 = vmulps(a, b, ROUNDING);
8031 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8032 }
8033}
8034
8035/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8036///
8037/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8038/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8039/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8040/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8041/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8042/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8043///
8044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
8045#[inline]
8046#[target_feature(enable = "avx512f")]
8047#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8048#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8049#[rustc_legacy_const_generics(3)]
8050pub fn _mm512_maskz_mul_round_ps<const ROUNDING: i32>(
8051 k: __mmask16,
8052 a: __m512,
8053 b: __m512,
8054) -> __m512 {
8055 unsafe {
8056 static_assert_rounding!(ROUNDING);
8057 let a: f32x16 = a.as_f32x16();
8058 let b: f32x16 = b.as_f32x16();
8059 let r: f32x16 = vmulps(a, b, ROUNDING);
8060 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8061 }
8062}
8063
8064/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8065///
8066/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8067/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8068/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8069/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8070/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8071/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8072///
8073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937)
8074#[inline]
8075#[target_feature(enable = "avx512f")]
8076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8077#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8078#[rustc_legacy_const_generics(2)]
8079pub fn _mm512_mul_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8080 unsafe {
8081 static_assert_rounding!(ROUNDING);
8082 let a: f64x8 = a.as_f64x8();
8083 let b: f64x8 = b.as_f64x8();
8084 let r: f64x8 = vmulpd(a, b, ROUNDING);
8085 transmute(src:r)
8086 }
8087}
8088
8089/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8090///
8091/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8092/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8093/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8094/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8095/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8096/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8097///
8098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935)
8099#[inline]
8100#[target_feature(enable = "avx512f")]
8101#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8102#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8103#[rustc_legacy_const_generics(4)]
8104pub fn _mm512_mask_mul_round_pd<const ROUNDING: i32>(
8105 src: __m512d,
8106 k: __mmask8,
8107 a: __m512d,
8108 b: __m512d,
8109) -> __m512d {
8110 unsafe {
8111 static_assert_rounding!(ROUNDING);
8112 let a: f64x8 = a.as_f64x8();
8113 let b: f64x8 = b.as_f64x8();
8114 let r: f64x8 = vmulpd(a, b, ROUNDING);
8115 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8116 }
8117}
8118
8119/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8120///
8121/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8122/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8123/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8124/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8125/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8126/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8127///
8128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_pd&expand=3939)
8129#[inline]
8130#[target_feature(enable = "avx512f")]
8131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8132#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8133#[rustc_legacy_const_generics(3)]
8134pub fn _mm512_maskz_mul_round_pd<const ROUNDING: i32>(
8135 k: __mmask8,
8136 a: __m512d,
8137 b: __m512d,
8138) -> __m512d {
8139 unsafe {
8140 static_assert_rounding!(ROUNDING);
8141 let a: f64x8 = a.as_f64x8();
8142 let b: f64x8 = b.as_f64x8();
8143 let r: f64x8 = vmulpd(a, b, ROUNDING);
8144 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8145 }
8146}
8147
8148/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
8149///
8150/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8151/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8152/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8153/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8154/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8156///
8157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168)
8158#[inline]
8159#[target_feature(enable = "avx512f")]
8160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8161#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8162#[rustc_legacy_const_generics(2)]
8163pub fn _mm512_div_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8164 unsafe {
8165 static_assert_rounding!(ROUNDING);
8166 let a: f32x16 = a.as_f32x16();
8167 let b: f32x16 = b.as_f32x16();
8168 let r: f32x16 = vdivps(a, b, ROUNDING);
8169 transmute(src:r)
8170 }
8171}
8172
8173/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8174///
8175/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8176/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8177/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8178/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8179/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8180/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8181///
8182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169)
8183#[inline]
8184#[target_feature(enable = "avx512f")]
8185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8186#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8187#[rustc_legacy_const_generics(4)]
8188pub fn _mm512_mask_div_round_ps<const ROUNDING: i32>(
8189 src: __m512,
8190 k: __mmask16,
8191 a: __m512,
8192 b: __m512,
8193) -> __m512 {
8194 unsafe {
8195 static_assert_rounding!(ROUNDING);
8196 let a: f32x16 = a.as_f32x16();
8197 let b: f32x16 = b.as_f32x16();
8198 let r: f32x16 = vdivps(a, b, ROUNDING);
8199 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8200 }
8201}
8202
8203/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8204///
8205/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8206/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8207/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8208/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8209/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8210/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8211///
8212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170)
8213#[inline]
8214#[target_feature(enable = "avx512f")]
8215#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8216#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8217#[rustc_legacy_const_generics(3)]
8218pub fn _mm512_maskz_div_round_ps<const ROUNDING: i32>(
8219 k: __mmask16,
8220 a: __m512,
8221 b: __m512,
8222) -> __m512 {
8223 unsafe {
8224 static_assert_rounding!(ROUNDING);
8225 let a: f32x16 = a.as_f32x16();
8226 let b: f32x16 = b.as_f32x16();
8227 let r: f32x16 = vdivps(a, b, ROUNDING);
8228 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8229 }
8230}
8231
8232/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
8233///
8234/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8235/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8236/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8237/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8238/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8239/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8240///
8241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165)
8242#[inline]
8243#[target_feature(enable = "avx512f")]
8244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8245#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8246#[rustc_legacy_const_generics(2)]
8247pub fn _mm512_div_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8248 unsafe {
8249 static_assert_rounding!(ROUNDING);
8250 let a: f64x8 = a.as_f64x8();
8251 let b: f64x8 = b.as_f64x8();
8252 let r: f64x8 = vdivpd(a, b, ROUNDING);
8253 transmute(src:r)
8254 }
8255}
8256
8257/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8258///
8259/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8260/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8261/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8262/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8263/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8264/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8265///
8266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166)
8267#[inline]
8268#[target_feature(enable = "avx512f")]
8269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8270#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8271#[rustc_legacy_const_generics(4)]
8272pub fn _mm512_mask_div_round_pd<const ROUNDING: i32>(
8273 src: __m512d,
8274 k: __mmask8,
8275 a: __m512d,
8276 b: __m512d,
8277) -> __m512d {
8278 unsafe {
8279 static_assert_rounding!(ROUNDING);
8280 let a: f64x8 = a.as_f64x8();
8281 let b: f64x8 = b.as_f64x8();
8282 let r: f64x8 = vdivpd(a, b, ROUNDING);
8283 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8284 }
8285}
8286
8287/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8288///
8289/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8290/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8291/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8292/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8293/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8294/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8295///
8296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167)
8297#[inline]
8298#[target_feature(enable = "avx512f")]
8299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8300#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8301#[rustc_legacy_const_generics(3)]
8302pub fn _mm512_maskz_div_round_pd<const ROUNDING: i32>(
8303 k: __mmask8,
8304 a: __m512d,
8305 b: __m512d,
8306) -> __m512d {
8307 unsafe {
8308 static_assert_rounding!(ROUNDING);
8309 let a: f64x8 = a.as_f64x8();
8310 let b: f64x8 = b.as_f64x8();
8311 let r: f64x8 = vdivpd(a, b, ROUNDING);
8312 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8313 }
8314}
8315
8316/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8317///
8318/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8319/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8320/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8321/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8322/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8323/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8324///
8325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377)
8326#[inline]
8327#[target_feature(enable = "avx512f")]
8328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8329#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8330#[rustc_legacy_const_generics(1)]
8331pub fn _mm512_sqrt_round_ps<const ROUNDING: i32>(a: __m512) -> __m512 {
8332 unsafe {
8333 static_assert_rounding!(ROUNDING);
8334 let a: f32x16 = a.as_f32x16();
8335 let r: f32x16 = vsqrtps(a, ROUNDING);
8336 transmute(src:r)
8337 }
8338}
8339
8340/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8341///
8342/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8343/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8344/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8345/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8346/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8347/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8348///
8349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375)
8350#[inline]
8351#[target_feature(enable = "avx512f")]
8352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8353#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8354#[rustc_legacy_const_generics(3)]
8355pub fn _mm512_mask_sqrt_round_ps<const ROUNDING: i32>(
8356 src: __m512,
8357 k: __mmask16,
8358 a: __m512,
8359) -> __m512 {
8360 unsafe {
8361 static_assert_rounding!(ROUNDING);
8362 let a: f32x16 = a.as_f32x16();
8363 let r: f32x16 = vsqrtps(a, ROUNDING);
8364 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8365 }
8366}
8367
8368/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8369///
8370/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8371/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8372/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8373/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8374/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8375/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8376///
8377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376)
8378#[inline]
8379#[target_feature(enable = "avx512f")]
8380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8381#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8382#[rustc_legacy_const_generics(2)]
8383pub fn _mm512_maskz_sqrt_round_ps<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512 {
8384 unsafe {
8385 static_assert_rounding!(ROUNDING);
8386 let a: f32x16 = a.as_f32x16();
8387 let r: f32x16 = vsqrtps(a, ROUNDING);
8388 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8389 }
8390}
8391
8392/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8393///
8394/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8395/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8396/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8397/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8398/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8399/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8400///
8401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374)
8402#[inline]
8403#[target_feature(enable = "avx512f")]
8404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8405#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8406#[rustc_legacy_const_generics(1)]
8407pub fn _mm512_sqrt_round_pd<const ROUNDING: i32>(a: __m512d) -> __m512d {
8408 unsafe {
8409 static_assert_rounding!(ROUNDING);
8410 let a: f64x8 = a.as_f64x8();
8411 let r: f64x8 = vsqrtpd(a, ROUNDING);
8412 transmute(src:r)
8413 }
8414}
8415
8416/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8417///
8418/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8419/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8420/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8421/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8422/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8423/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8424///
8425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372)
8426#[inline]
8427#[target_feature(enable = "avx512f")]
8428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8429#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8430#[rustc_legacy_const_generics(3)]
8431pub fn _mm512_mask_sqrt_round_pd<const ROUNDING: i32>(
8432 src: __m512d,
8433 k: __mmask8,
8434 a: __m512d,
8435) -> __m512d {
8436 unsafe {
8437 static_assert_rounding!(ROUNDING);
8438 let a: f64x8 = a.as_f64x8();
8439 let r: f64x8 = vsqrtpd(a, ROUNDING);
8440 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8441 }
8442}
8443
8444/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8445///
8446/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8447/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8448/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8449/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8450/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8451/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8452///
8453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373)
8454#[inline]
8455#[target_feature(enable = "avx512f")]
8456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8457#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8458#[rustc_legacy_const_generics(2)]
8459pub fn _mm512_maskz_sqrt_round_pd<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512d {
8460 unsafe {
8461 static_assert_rounding!(ROUNDING);
8462 let a: f64x8 = a.as_f64x8();
8463 let r: f64x8 = vsqrtpd(a, ROUNDING);
8464 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8465 }
8466}
8467
8468/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8469///
8470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8476///
8477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565)
8478#[inline]
8479#[target_feature(enable = "avx512f")]
8480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8481#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8482#[rustc_legacy_const_generics(3)]
8483pub fn _mm512_fmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8484 unsafe {
8485 static_assert_rounding!(ROUNDING);
8486 vfmadd132psround(a, b, c, ROUNDING)
8487 }
8488}
8489
8490/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8491///
8492/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8493/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8494/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8495/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8496/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8497/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8498///
8499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566)
8500#[inline]
8501#[target_feature(enable = "avx512f")]
8502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8503#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8504#[rustc_legacy_const_generics(4)]
8505pub fn _mm512_mask_fmadd_round_ps<const ROUNDING: i32>(
8506 a: __m512,
8507 k: __mmask16,
8508 b: __m512,
8509 c: __m512,
8510) -> __m512 {
8511 unsafe {
8512 static_assert_rounding!(ROUNDING);
8513 simd_select_bitmask(m:k, yes:vfmadd132psround(a, b, c, ROUNDING), no:a)
8514 }
8515}
8516
8517/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8518///
8519/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8520/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8521/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8522/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8523/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8524/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8525///
8526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568)
8527#[inline]
8528#[target_feature(enable = "avx512f")]
8529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8530#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8531#[rustc_legacy_const_generics(4)]
8532pub fn _mm512_maskz_fmadd_round_ps<const ROUNDING: i32>(
8533 k: __mmask16,
8534 a: __m512,
8535 b: __m512,
8536 c: __m512,
8537) -> __m512 {
8538 unsafe {
8539 static_assert_rounding!(ROUNDING);
8540 simd_select_bitmask(m:k, yes:vfmadd132psround(a, b, c, ROUNDING), no:_mm512_setzero_ps())
8541 }
8542}
8543
8544/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8545///
8546/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8547/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8548/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8549/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8550/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8551/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8552///
8553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567)
8554#[inline]
8555#[target_feature(enable = "avx512f")]
8556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8557#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8558#[rustc_legacy_const_generics(4)]
8559pub fn _mm512_mask3_fmadd_round_ps<const ROUNDING: i32>(
8560 a: __m512,
8561 b: __m512,
8562 c: __m512,
8563 k: __mmask16,
8564) -> __m512 {
8565 unsafe {
8566 static_assert_rounding!(ROUNDING);
8567 simd_select_bitmask(m:k, yes:vfmadd132psround(a, b, c, ROUNDING), no:c)
8568 }
8569}
8570
8571/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8572///
8573/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8574/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8575/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8576/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8577/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8578/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8579///
8580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561)
8581#[inline]
8582#[target_feature(enable = "avx512f")]
8583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8584#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8585#[rustc_legacy_const_generics(3)]
8586pub fn _mm512_fmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8587 unsafe {
8588 static_assert_rounding!(ROUNDING);
8589 vfmadd132pdround(a, b, c, ROUNDING)
8590 }
8591}
8592
8593/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8594///
8595/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8596/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8597/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8598/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8599/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8600/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8601///
8602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562)
8603#[inline]
8604#[target_feature(enable = "avx512f")]
8605#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8606#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8607#[rustc_legacy_const_generics(4)]
8608pub fn _mm512_mask_fmadd_round_pd<const ROUNDING: i32>(
8609 a: __m512d,
8610 k: __mmask8,
8611 b: __m512d,
8612 c: __m512d,
8613) -> __m512d {
8614 unsafe {
8615 static_assert_rounding!(ROUNDING);
8616 simd_select_bitmask(m:k, yes:vfmadd132pdround(a, b, c, ROUNDING), no:a)
8617 }
8618}
8619
8620/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8621///
8622/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8623/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8624/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8625/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8626/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8627/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8628///
8629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564)
8630#[inline]
8631#[target_feature(enable = "avx512f")]
8632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8633#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8634#[rustc_legacy_const_generics(4)]
8635pub fn _mm512_maskz_fmadd_round_pd<const ROUNDING: i32>(
8636 k: __mmask8,
8637 a: __m512d,
8638 b: __m512d,
8639 c: __m512d,
8640) -> __m512d {
8641 unsafe {
8642 static_assert_rounding!(ROUNDING);
8643 simd_select_bitmask(m:k, yes:vfmadd132pdround(a, b, c, ROUNDING), no:_mm512_setzero_pd())
8644 }
8645}
8646
8647/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8648///
8649/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8650/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8651/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8652/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8653/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8654/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8655///
8656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563)
8657#[inline]
8658#[target_feature(enable = "avx512f")]
8659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8660#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8661#[rustc_legacy_const_generics(4)]
8662pub fn _mm512_mask3_fmadd_round_pd<const ROUNDING: i32>(
8663 a: __m512d,
8664 b: __m512d,
8665 c: __m512d,
8666 k: __mmask8,
8667) -> __m512d {
8668 unsafe {
8669 static_assert_rounding!(ROUNDING);
8670 simd_select_bitmask(m:k, yes:vfmadd132pdround(a, b, c, ROUNDING), no:c)
8671 }
8672}
8673
8674/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8675///
8676/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8677/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8678/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8679/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8680/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8681/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8682///
8683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651)
8684#[inline]
8685#[target_feature(enable = "avx512f")]
8686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8687#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8688#[rustc_legacy_const_generics(3)]
8689pub fn _mm512_fmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8690 unsafe {
8691 static_assert_rounding!(ROUNDING);
8692 vfmadd132psround(a, b, c:simd_neg(c), ROUNDING)
8693 }
8694}
8695
8696/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8697///
8698/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8699/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8700/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8701/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8702/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8703/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8704///
8705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652)
8706#[inline]
8707#[target_feature(enable = "avx512f")]
8708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8709#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8710#[rustc_legacy_const_generics(4)]
8711pub fn _mm512_mask_fmsub_round_ps<const ROUNDING: i32>(
8712 a: __m512,
8713 k: __mmask16,
8714 b: __m512,
8715 c: __m512,
8716) -> __m512 {
8717 unsafe {
8718 static_assert_rounding!(ROUNDING);
8719 let r: __m512 = vfmadd132psround(a, b, c:simd_neg(c), ROUNDING);
8720 simd_select_bitmask(m:k, yes:r, no:a)
8721 }
8722}
8723
8724/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8725///
8726/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8727/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8728/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8729/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8730/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8731/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8732///
8733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654)
8734#[inline]
8735#[target_feature(enable = "avx512f")]
8736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8737#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8738#[rustc_legacy_const_generics(4)]
8739pub fn _mm512_maskz_fmsub_round_ps<const ROUNDING: i32>(
8740 k: __mmask16,
8741 a: __m512,
8742 b: __m512,
8743 c: __m512,
8744) -> __m512 {
8745 unsafe {
8746 static_assert_rounding!(ROUNDING);
8747 let r: __m512 = vfmadd132psround(a, b, c:simd_neg(c), ROUNDING);
8748 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
8749 }
8750}
8751
8752/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8753///
8754/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8755/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8756/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8757/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8758/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8759/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8760///
8761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653)
8762#[inline]
8763#[target_feature(enable = "avx512f")]
8764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8765#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8766#[rustc_legacy_const_generics(4)]
8767pub fn _mm512_mask3_fmsub_round_ps<const ROUNDING: i32>(
8768 a: __m512,
8769 b: __m512,
8770 c: __m512,
8771 k: __mmask16,
8772) -> __m512 {
8773 unsafe {
8774 static_assert_rounding!(ROUNDING);
8775 let r: __m512 = vfmadd132psround(a, b, c:simd_neg(c), ROUNDING);
8776 simd_select_bitmask(m:k, yes:r, no:c)
8777 }
8778}
8779
8780/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8781///
8782/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8783/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8784/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8785/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8786/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8787/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8788///
8789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647)
8790#[inline]
8791#[target_feature(enable = "avx512f")]
8792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8793#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8794#[rustc_legacy_const_generics(3)]
8795pub fn _mm512_fmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8796 unsafe {
8797 static_assert_rounding!(ROUNDING);
8798 vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING)
8799 }
8800}
8801
8802/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8803///
8804/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8805/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8806/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8807/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8808/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8809/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8810///
8811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648)
8812#[inline]
8813#[target_feature(enable = "avx512f")]
8814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8815#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8816#[rustc_legacy_const_generics(4)]
8817pub fn _mm512_mask_fmsub_round_pd<const ROUNDING: i32>(
8818 a: __m512d,
8819 k: __mmask8,
8820 b: __m512d,
8821 c: __m512d,
8822) -> __m512d {
8823 unsafe {
8824 static_assert_rounding!(ROUNDING);
8825 let r: __m512d = vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING);
8826 simd_select_bitmask(m:k, yes:r, no:a)
8827 }
8828}
8829
8830/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8831///
8832/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8833/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8834/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8835/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8836/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8837/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8838///
8839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650)
8840#[inline]
8841#[target_feature(enable = "avx512f")]
8842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8843#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8844#[rustc_legacy_const_generics(4)]
8845pub fn _mm512_maskz_fmsub_round_pd<const ROUNDING: i32>(
8846 k: __mmask8,
8847 a: __m512d,
8848 b: __m512d,
8849 c: __m512d,
8850) -> __m512d {
8851 unsafe {
8852 static_assert_rounding!(ROUNDING);
8853 let r: __m512d = vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING);
8854 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
8855 }
8856}
8857
8858/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8859///
8860/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8861/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8862/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8863/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8864/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8865/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8866///
8867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649)
8868#[inline]
8869#[target_feature(enable = "avx512f")]
8870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8871#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8872#[rustc_legacy_const_generics(4)]
8873pub fn _mm512_mask3_fmsub_round_pd<const ROUNDING: i32>(
8874 a: __m512d,
8875 b: __m512d,
8876 c: __m512d,
8877 k: __mmask8,
8878) -> __m512d {
8879 unsafe {
8880 static_assert_rounding!(ROUNDING);
8881 let r: __m512d = vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING);
8882 simd_select_bitmask(m:k, yes:r, no:c)
8883 }
8884}
8885
8886/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8887///
8888/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8889/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8890/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8891/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8892/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8893/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8894///
8895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619)
8896#[inline]
8897#[target_feature(enable = "avx512f")]
8898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8899#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8900#[rustc_legacy_const_generics(3)]
8901pub fn _mm512_fmaddsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8902 unsafe {
8903 static_assert_rounding!(ROUNDING);
8904 vfmaddsubpsround(a, b, c, ROUNDING)
8905 }
8906}
8907
8908/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8909///
8910/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8911/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8912/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8913/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8914/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8915/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8916///
8917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620)
8918#[inline]
8919#[target_feature(enable = "avx512f")]
8920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8921#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8922#[rustc_legacy_const_generics(4)]
8923pub fn _mm512_mask_fmaddsub_round_ps<const ROUNDING: i32>(
8924 a: __m512,
8925 k: __mmask16,
8926 b: __m512,
8927 c: __m512,
8928) -> __m512 {
8929 unsafe {
8930 static_assert_rounding!(ROUNDING);
8931 simd_select_bitmask(m:k, yes:vfmaddsubpsround(a, b, c, ROUNDING), no:a)
8932 }
8933}
8934
8935/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8936///
8937/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8938/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8939/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8940/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8941/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8942/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8943///
8944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622)
8945#[inline]
8946#[target_feature(enable = "avx512f")]
8947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8948#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8949#[rustc_legacy_const_generics(4)]
8950pub fn _mm512_maskz_fmaddsub_round_ps<const ROUNDING: i32>(
8951 k: __mmask16,
8952 a: __m512,
8953 b: __m512,
8954 c: __m512,
8955) -> __m512 {
8956 unsafe {
8957 static_assert_rounding!(ROUNDING);
8958 simd_select_bitmask(m:k, yes:vfmaddsubpsround(a, b, c, ROUNDING), no:_mm512_setzero_ps())
8959 }
8960}
8961
8962/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8963///
8964/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8965/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8966/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8967/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8968/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8969/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8970///
8971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621)
8972#[inline]
8973#[target_feature(enable = "avx512f")]
8974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
8975#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8976#[rustc_legacy_const_generics(4)]
8977pub fn _mm512_mask3_fmaddsub_round_ps<const ROUNDING: i32>(
8978 a: __m512,
8979 b: __m512,
8980 c: __m512,
8981 k: __mmask16,
8982) -> __m512 {
8983 unsafe {
8984 static_assert_rounding!(ROUNDING);
8985 simd_select_bitmask(m:k, yes:vfmaddsubpsround(a, b, c, ROUNDING), no:c)
8986 }
8987}
8988
8989/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8990///
8991/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8992/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8993/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8994/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8995/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8996/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8997///
8998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615)
8999#[inline]
9000#[target_feature(enable = "avx512f")]
9001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9002#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9003#[rustc_legacy_const_generics(3)]
9004pub fn _mm512_fmaddsub_round_pd<const ROUNDING: i32>(
9005 a: __m512d,
9006 b: __m512d,
9007 c: __m512d,
9008) -> __m512d {
9009 unsafe {
9010 static_assert_rounding!(ROUNDING);
9011 vfmaddsubpdround(a, b, c, ROUNDING)
9012 }
9013}
9014
9015/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9016///
9017/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9018/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9019/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9020/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9021/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9022/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9023///
9024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616)
9025#[inline]
9026#[target_feature(enable = "avx512f")]
9027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9028#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9029#[rustc_legacy_const_generics(4)]
9030pub fn _mm512_mask_fmaddsub_round_pd<const ROUNDING: i32>(
9031 a: __m512d,
9032 k: __mmask8,
9033 b: __m512d,
9034 c: __m512d,
9035) -> __m512d {
9036 unsafe {
9037 static_assert_rounding!(ROUNDING);
9038 simd_select_bitmask(m:k, yes:vfmaddsubpdround(a, b, c, ROUNDING), no:a)
9039 }
9040}
9041
9042/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9043///
9044/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9045/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9046/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9047/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9048/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9049/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9050///
9051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618)
9052#[inline]
9053#[target_feature(enable = "avx512f")]
9054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9055#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9056#[rustc_legacy_const_generics(4)]
9057pub fn _mm512_maskz_fmaddsub_round_pd<const ROUNDING: i32>(
9058 k: __mmask8,
9059 a: __m512d,
9060 b: __m512d,
9061 c: __m512d,
9062) -> __m512d {
9063 unsafe {
9064 static_assert_rounding!(ROUNDING);
9065 simd_select_bitmask(m:k, yes:vfmaddsubpdround(a, b, c, ROUNDING), no:_mm512_setzero_pd())
9066 }
9067}
9068
9069/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9070///
9071/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9072/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9073/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9074/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9075/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9076/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9077///
9078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617)
9079#[inline]
9080#[target_feature(enable = "avx512f")]
9081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9082#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9083#[rustc_legacy_const_generics(4)]
9084pub fn _mm512_mask3_fmaddsub_round_pd<const ROUNDING: i32>(
9085 a: __m512d,
9086 b: __m512d,
9087 c: __m512d,
9088 k: __mmask8,
9089) -> __m512d {
9090 unsafe {
9091 static_assert_rounding!(ROUNDING);
9092 simd_select_bitmask(m:k, yes:vfmaddsubpdround(a, b, c, ROUNDING), no:c)
9093 }
9094}
9095
9096/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9097///
9098/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9099/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9100/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9101/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9102/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9103/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9104///
9105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699)
9106#[inline]
9107#[target_feature(enable = "avx512f")]
9108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9109#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9110#[rustc_legacy_const_generics(3)]
9111pub fn _mm512_fmsubadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9112 unsafe {
9113 static_assert_rounding!(ROUNDING);
9114 vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING)
9115 }
9116}
9117
9118/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9119///
9120/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9121/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9122/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9123/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9124/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9125/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9126///
9127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700)
9128#[inline]
9129#[target_feature(enable = "avx512f")]
9130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9131#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9132#[rustc_legacy_const_generics(4)]
9133pub fn _mm512_mask_fmsubadd_round_ps<const ROUNDING: i32>(
9134 a: __m512,
9135 k: __mmask16,
9136 b: __m512,
9137 c: __m512,
9138) -> __m512 {
9139 unsafe {
9140 static_assert_rounding!(ROUNDING);
9141 let r: __m512 = vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING);
9142 simd_select_bitmask(m:k, yes:r, no:a)
9143 }
9144}
9145
9146/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9147///
9148/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9149/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9150/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9151/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9152/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9153/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9154///
9155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702)
9156#[inline]
9157#[target_feature(enable = "avx512f")]
9158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9159#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9160#[rustc_legacy_const_generics(4)]
9161pub fn _mm512_maskz_fmsubadd_round_ps<const ROUNDING: i32>(
9162 k: __mmask16,
9163 a: __m512,
9164 b: __m512,
9165 c: __m512,
9166) -> __m512 {
9167 unsafe {
9168 static_assert_rounding!(ROUNDING);
9169 let r: __m512 = vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING);
9170 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
9171 }
9172}
9173
9174/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9175///
9176/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9177/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9178/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9179/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9180/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9181/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9182///
9183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701)
9184#[inline]
9185#[target_feature(enable = "avx512f")]
9186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9187#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9188#[rustc_legacy_const_generics(4)]
9189pub fn _mm512_mask3_fmsubadd_round_ps<const ROUNDING: i32>(
9190 a: __m512,
9191 b: __m512,
9192 c: __m512,
9193 k: __mmask16,
9194) -> __m512 {
9195 unsafe {
9196 static_assert_rounding!(ROUNDING);
9197 let r: __m512 = vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING);
9198 simd_select_bitmask(m:k, yes:r, no:c)
9199 }
9200}
9201
9202/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9203///
9204/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9205/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9206/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9207/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9208/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9209/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9210///
9211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695)
9212#[inline]
9213#[target_feature(enable = "avx512f")]
9214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9215#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9216#[rustc_legacy_const_generics(3)]
9217pub fn _mm512_fmsubadd_round_pd<const ROUNDING: i32>(
9218 a: __m512d,
9219 b: __m512d,
9220 c: __m512d,
9221) -> __m512d {
9222 unsafe {
9223 static_assert_rounding!(ROUNDING);
9224 vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING)
9225 }
9226}
9227
9228/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9229///
9230/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9231/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9232/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9233/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9234/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9235/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9236///
9237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696)
9238#[inline]
9239#[target_feature(enable = "avx512f")]
9240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9241#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9242#[rustc_legacy_const_generics(4)]
9243pub fn _mm512_mask_fmsubadd_round_pd<const ROUNDING: i32>(
9244 a: __m512d,
9245 k: __mmask8,
9246 b: __m512d,
9247 c: __m512d,
9248) -> __m512d {
9249 unsafe {
9250 static_assert_rounding!(ROUNDING);
9251 let r: __m512d = vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING);
9252 simd_select_bitmask(m:k, yes:r, no:a)
9253 }
9254}
9255
9256/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9257///
9258/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9259/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9260/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9261/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9262/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9263/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9264///
9265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698)
9266#[inline]
9267#[target_feature(enable = "avx512f")]
9268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9269#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9270#[rustc_legacy_const_generics(4)]
9271pub fn _mm512_maskz_fmsubadd_round_pd<const ROUNDING: i32>(
9272 k: __mmask8,
9273 a: __m512d,
9274 b: __m512d,
9275 c: __m512d,
9276) -> __m512d {
9277 unsafe {
9278 static_assert_rounding!(ROUNDING);
9279 let r: __m512d = vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING);
9280 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
9281 }
9282}
9283
9284/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9285///
9286/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9287/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9288/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9289/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9290/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9291/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9292///
9293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697)
9294#[inline]
9295#[target_feature(enable = "avx512f")]
9296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9297#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9298#[rustc_legacy_const_generics(4)]
9299pub fn _mm512_mask3_fmsubadd_round_pd<const ROUNDING: i32>(
9300 a: __m512d,
9301 b: __m512d,
9302 c: __m512d,
9303 k: __mmask8,
9304) -> __m512d {
9305 unsafe {
9306 static_assert_rounding!(ROUNDING);
9307 let r: __m512d = vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING);
9308 simd_select_bitmask(m:k, yes:r, no:c)
9309 }
9310}
9311
9312/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9313///
9314/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9315/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9316/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9317/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9318/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9319/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9320///
9321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731)
9322#[inline]
9323#[target_feature(enable = "avx512f")]
9324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9325#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9326#[rustc_legacy_const_generics(3)]
9327pub fn _mm512_fnmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9328 unsafe {
9329 static_assert_rounding!(ROUNDING);
9330 vfmadd132psround(a:simd_neg(a), b, c, ROUNDING)
9331 }
9332}
9333
9334/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9335///
9336/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9337/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9338/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9339/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9340/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9341/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9342///
9343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732)
9344#[inline]
9345#[target_feature(enable = "avx512f")]
9346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9347#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9348#[rustc_legacy_const_generics(4)]
9349pub fn _mm512_mask_fnmadd_round_ps<const ROUNDING: i32>(
9350 a: __m512,
9351 k: __mmask16,
9352 b: __m512,
9353 c: __m512,
9354) -> __m512 {
9355 unsafe {
9356 static_assert_rounding!(ROUNDING);
9357 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c, ROUNDING);
9358 simd_select_bitmask(m:k, yes:r, no:a)
9359 }
9360}
9361
9362/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9363///
9364/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9365/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9366/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9367/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9368/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9369/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9370///
9371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734)
9372#[inline]
9373#[target_feature(enable = "avx512f")]
9374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9375#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9376#[rustc_legacy_const_generics(4)]
9377pub fn _mm512_maskz_fnmadd_round_ps<const ROUNDING: i32>(
9378 k: __mmask16,
9379 a: __m512,
9380 b: __m512,
9381 c: __m512,
9382) -> __m512 {
9383 unsafe {
9384 static_assert_rounding!(ROUNDING);
9385 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c, ROUNDING);
9386 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
9387 }
9388}
9389
9390/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9391///
9392/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9393/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9394/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9395/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9396/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9397/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9398///
9399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733)
9400#[inline]
9401#[target_feature(enable = "avx512f")]
9402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9403#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9404#[rustc_legacy_const_generics(4)]
9405pub fn _mm512_mask3_fnmadd_round_ps<const ROUNDING: i32>(
9406 a: __m512,
9407 b: __m512,
9408 c: __m512,
9409 k: __mmask16,
9410) -> __m512 {
9411 unsafe {
9412 static_assert_rounding!(ROUNDING);
9413 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c, ROUNDING);
9414 simd_select_bitmask(m:k, yes:r, no:c)
9415 }
9416}
9417
9418/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9419///
9420/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9421/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9422/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9423/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9424/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9425/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9426///
9427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_pd&expand=2711)
9428#[inline]
9429#[target_feature(enable = "avx512f")]
9430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9431#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9432#[rustc_legacy_const_generics(3)]
9433pub fn _mm512_fnmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9434 unsafe {
9435 static_assert_rounding!(ROUNDING);
9436 vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING)
9437 }
9438}
9439
9440/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9441///
9442/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9443/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9444/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9445/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9446/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9447/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9448///
9449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728)
9450#[inline]
9451#[target_feature(enable = "avx512f")]
9452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9453#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9454#[rustc_legacy_const_generics(4)]
9455pub fn _mm512_mask_fnmadd_round_pd<const ROUNDING: i32>(
9456 a: __m512d,
9457 k: __mmask8,
9458 b: __m512d,
9459 c: __m512d,
9460) -> __m512d {
9461 unsafe {
9462 static_assert_rounding!(ROUNDING);
9463 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING);
9464 simd_select_bitmask(m:k, yes:r, no:a)
9465 }
9466}
9467
9468/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9469///
9470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9476///
9477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730)
9478#[inline]
9479#[target_feature(enable = "avx512f")]
9480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9481#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9482#[rustc_legacy_const_generics(4)]
9483pub fn _mm512_maskz_fnmadd_round_pd<const ROUNDING: i32>(
9484 k: __mmask8,
9485 a: __m512d,
9486 b: __m512d,
9487 c: __m512d,
9488) -> __m512d {
9489 unsafe {
9490 static_assert_rounding!(ROUNDING);
9491 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING);
9492 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
9493 }
9494}
9495
9496/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9497///
9498/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9499/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9500/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9501/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9502/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9503/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9504///
9505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729)
9506#[inline]
9507#[target_feature(enable = "avx512f")]
9508#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9509#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9510#[rustc_legacy_const_generics(4)]
9511pub fn _mm512_mask3_fnmadd_round_pd<const ROUNDING: i32>(
9512 a: __m512d,
9513 b: __m512d,
9514 c: __m512d,
9515 k: __mmask8,
9516) -> __m512d {
9517 unsafe {
9518 static_assert_rounding!(ROUNDING);
9519 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING);
9520 simd_select_bitmask(m:k, yes:r, no:c)
9521 }
9522}
9523
9524/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9525///
9526/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9527/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9528/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9529/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9530/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9531/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9532///
9533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779)
9534#[inline]
9535#[target_feature(enable = "avx512f")]
9536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9537#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9538#[rustc_legacy_const_generics(3)]
9539pub fn _mm512_fnmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9540 unsafe {
9541 static_assert_rounding!(ROUNDING);
9542 vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING)
9543 }
9544}
9545
9546/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9547///
9548/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9549/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9550/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9551/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9552/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9553/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9554///
9555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780)
9556#[inline]
9557#[target_feature(enable = "avx512f")]
9558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9559#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9560#[rustc_legacy_const_generics(4)]
9561pub fn _mm512_mask_fnmsub_round_ps<const ROUNDING: i32>(
9562 a: __m512,
9563 k: __mmask16,
9564 b: __m512,
9565 c: __m512,
9566) -> __m512 {
9567 unsafe {
9568 static_assert_rounding!(ROUNDING);
9569 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9570 simd_select_bitmask(m:k, yes:r, no:a)
9571 }
9572}
9573
9574/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9575///
9576/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9577/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9578/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9579/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9580/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9581/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9582///
9583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782)
9584#[inline]
9585#[target_feature(enable = "avx512f")]
9586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9587#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9588#[rustc_legacy_const_generics(4)]
9589pub fn _mm512_maskz_fnmsub_round_ps<const ROUNDING: i32>(
9590 k: __mmask16,
9591 a: __m512,
9592 b: __m512,
9593 c: __m512,
9594) -> __m512 {
9595 unsafe {
9596 static_assert_rounding!(ROUNDING);
9597 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9598 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
9599 }
9600}
9601
9602/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9603///
9604/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9605/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9606/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9607/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9608/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9609/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9610///
9611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781)
9612#[inline]
9613#[target_feature(enable = "avx512f")]
9614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9615#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9616#[rustc_legacy_const_generics(4)]
9617pub fn _mm512_mask3_fnmsub_round_ps<const ROUNDING: i32>(
9618 a: __m512,
9619 b: __m512,
9620 c: __m512,
9621 k: __mmask16,
9622) -> __m512 {
9623 unsafe {
9624 static_assert_rounding!(ROUNDING);
9625 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9626 simd_select_bitmask(m:k, yes:r, no:c)
9627 }
9628}
9629
9630/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9631///
9632/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9633/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9634/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9635/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9636/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9637/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9638///
9639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775)
9640#[inline]
9641#[target_feature(enable = "avx512f")]
9642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9643#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9644#[rustc_legacy_const_generics(3)]
9645pub fn _mm512_fnmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9646 unsafe {
9647 static_assert_rounding!(ROUNDING);
9648 vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING)
9649 }
9650}
9651
9652/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9653///
9654/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9655/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9656/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9657/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9658/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9659/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9660///
9661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776)
9662#[inline]
9663#[target_feature(enable = "avx512f")]
9664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9665#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9666#[rustc_legacy_const_generics(4)]
9667pub fn _mm512_mask_fnmsub_round_pd<const ROUNDING: i32>(
9668 a: __m512d,
9669 k: __mmask8,
9670 b: __m512d,
9671 c: __m512d,
9672) -> __m512d {
9673 unsafe {
9674 static_assert_rounding!(ROUNDING);
9675 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9676 simd_select_bitmask(m:k, yes:r, no:a)
9677 }
9678}
9679
9680/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9681///
9682/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9683/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9684/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9685/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9686/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9687/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9688///
9689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778)
9690#[inline]
9691#[target_feature(enable = "avx512f")]
9692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9693#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9694#[rustc_legacy_const_generics(4)]
9695pub fn _mm512_maskz_fnmsub_round_pd<const ROUNDING: i32>(
9696 k: __mmask8,
9697 a: __m512d,
9698 b: __m512d,
9699 c: __m512d,
9700) -> __m512d {
9701 unsafe {
9702 static_assert_rounding!(ROUNDING);
9703 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9704 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
9705 }
9706}
9707
9708/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9709///
9710/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9711/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9712/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9713/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9714/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9715/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9716///
9717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777)
9718#[inline]
9719#[target_feature(enable = "avx512f")]
9720#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9721#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9722#[rustc_legacy_const_generics(4)]
9723pub fn _mm512_mask3_fnmsub_round_pd<const ROUNDING: i32>(
9724 a: __m512d,
9725 b: __m512d,
9726 c: __m512d,
9727 k: __mmask8,
9728) -> __m512d {
9729 unsafe {
9730 static_assert_rounding!(ROUNDING);
9731 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9732 simd_select_bitmask(m:k, yes:r, no:c)
9733 }
9734}
9735
9736/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9737/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9738///
9739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662)
9740#[inline]
9741#[target_feature(enable = "avx512f")]
9742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9743#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9744#[rustc_legacy_const_generics(2)]
9745pub fn _mm512_max_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9746 unsafe {
9747 static_assert_sae!(SAE);
9748 let a: f32x16 = a.as_f32x16();
9749 let b: f32x16 = b.as_f32x16();
9750 let r: f32x16 = vmaxps(a, b, SAE);
9751 transmute(src:r)
9752 }
9753}
9754
9755/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9756/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9757///
9758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660)
9759#[inline]
9760#[target_feature(enable = "avx512f")]
9761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9762#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9763#[rustc_legacy_const_generics(4)]
9764pub fn _mm512_mask_max_round_ps<const SAE: i32>(
9765 src: __m512,
9766 k: __mmask16,
9767 a: __m512,
9768 b: __m512,
9769) -> __m512 {
9770 unsafe {
9771 static_assert_sae!(SAE);
9772 let a: f32x16 = a.as_f32x16();
9773 let b: f32x16 = b.as_f32x16();
9774 let r: f32x16 = vmaxps(a, b, SAE);
9775 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
9776 }
9777}
9778
9779/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9781///
9782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661)
9783#[inline]
9784#[target_feature(enable = "avx512f")]
9785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9786#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9787#[rustc_legacy_const_generics(3)]
9788pub fn _mm512_maskz_max_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9789 unsafe {
9790 static_assert_sae!(SAE);
9791 let a: f32x16 = a.as_f32x16();
9792 let b: f32x16 = b.as_f32x16();
9793 let r: f32x16 = vmaxps(a, b, SAE);
9794 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
9795 }
9796}
9797
9798/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9799/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9800///
9801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659)
9802#[inline]
9803#[target_feature(enable = "avx512f")]
9804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9805#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9806#[rustc_legacy_const_generics(2)]
9807pub fn _mm512_max_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9808 unsafe {
9809 static_assert_sae!(SAE);
9810 let a: f64x8 = a.as_f64x8();
9811 let b: f64x8 = b.as_f64x8();
9812 let r: f64x8 = vmaxpd(a, b, SAE);
9813 transmute(src:r)
9814 }
9815}
9816
9817/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9818/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9819///
9820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657)
9821#[inline]
9822#[target_feature(enable = "avx512f")]
9823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9824#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9825#[rustc_legacy_const_generics(4)]
9826pub fn _mm512_mask_max_round_pd<const SAE: i32>(
9827 src: __m512d,
9828 k: __mmask8,
9829 a: __m512d,
9830 b: __m512d,
9831) -> __m512d {
9832 unsafe {
9833 static_assert_sae!(SAE);
9834 let a: f64x8 = a.as_f64x8();
9835 let b: f64x8 = b.as_f64x8();
9836 let r: f64x8 = vmaxpd(a, b, SAE);
9837 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
9838 }
9839}
9840
9841/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9842/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9843///
9844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658)
9845#[inline]
9846#[target_feature(enable = "avx512f")]
9847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9848#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9849#[rustc_legacy_const_generics(3)]
9850pub fn _mm512_maskz_max_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9851 unsafe {
9852 static_assert_sae!(SAE);
9853 let a: f64x8 = a.as_f64x8();
9854 let b: f64x8 = b.as_f64x8();
9855 let r: f64x8 = vmaxpd(a, b, SAE);
9856 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
9857 }
9858}
9859
9860/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9861/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9862///
9863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776)
9864#[inline]
9865#[target_feature(enable = "avx512f")]
9866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9867#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9868#[rustc_legacy_const_generics(2)]
9869pub fn _mm512_min_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9870 unsafe {
9871 static_assert_sae!(SAE);
9872 let a: f32x16 = a.as_f32x16();
9873 let b: f32x16 = b.as_f32x16();
9874 let r: f32x16 = vminps(a, b, SAE);
9875 transmute(src:r)
9876 }
9877}
9878
9879/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9880/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9881///
9882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774)
9883#[inline]
9884#[target_feature(enable = "avx512f")]
9885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9886#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9887#[rustc_legacy_const_generics(4)]
9888pub fn _mm512_mask_min_round_ps<const SAE: i32>(
9889 src: __m512,
9890 k: __mmask16,
9891 a: __m512,
9892 b: __m512,
9893) -> __m512 {
9894 unsafe {
9895 static_assert_sae!(SAE);
9896 let a: f32x16 = a.as_f32x16();
9897 let b: f32x16 = b.as_f32x16();
9898 let r: f32x16 = vminps(a, b, SAE);
9899 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
9900 }
9901}
9902
9903/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9904/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9905///
9906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775)
9907#[inline]
9908#[target_feature(enable = "avx512f")]
9909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9910#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9911#[rustc_legacy_const_generics(3)]
9912pub fn _mm512_maskz_min_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9913 unsafe {
9914 static_assert_sae!(SAE);
9915 let a: f32x16 = a.as_f32x16();
9916 let b: f32x16 = b.as_f32x16();
9917 let r: f32x16 = vminps(a, b, SAE);
9918 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
9919 }
9920}
9921
9922/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9923/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9924///
9925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773)
9926#[inline]
9927#[target_feature(enable = "avx512f")]
9928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9929#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9930#[rustc_legacy_const_generics(2)]
9931pub fn _mm512_min_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9932 unsafe {
9933 static_assert_sae!(SAE);
9934 let a: f64x8 = a.as_f64x8();
9935 let b: f64x8 = b.as_f64x8();
9936 let r: f64x8 = vminpd(a, b, SAE);
9937 transmute(src:r)
9938 }
9939}
9940
9941/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9942/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9943///
9944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771)
9945#[inline]
9946#[target_feature(enable = "avx512f")]
9947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9948#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9949#[rustc_legacy_const_generics(4)]
9950pub fn _mm512_mask_min_round_pd<const SAE: i32>(
9951 src: __m512d,
9952 k: __mmask8,
9953 a: __m512d,
9954 b: __m512d,
9955) -> __m512d {
9956 unsafe {
9957 static_assert_sae!(SAE);
9958 let a: f64x8 = a.as_f64x8();
9959 let b: f64x8 = b.as_f64x8();
9960 let r: f64x8 = vminpd(a, b, SAE);
9961 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
9962 }
9963}
9964
9965/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9966/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9967///
9968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772)
9969#[inline]
9970#[target_feature(enable = "avx512f")]
9971#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9972#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9973#[rustc_legacy_const_generics(3)]
9974pub fn _mm512_maskz_min_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9975 unsafe {
9976 static_assert_sae!(SAE);
9977 let a: f64x8 = a.as_f64x8();
9978 let b: f64x8 = b.as_f64x8();
9979 let r: f64x8 = vminpd(a, b, SAE);
9980 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
9981 }
9982}
9983
9984/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
9985/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9986///
9987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850)
9988#[inline]
9989#[target_feature(enable = "avx512f")]
9990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
9991#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
9992#[rustc_legacy_const_generics(1)]
9993pub fn _mm512_getexp_round_ps<const SAE: i32>(a: __m512) -> __m512 {
9994 unsafe {
9995 static_assert_sae!(SAE);
9996 let a: f32x16 = a.as_f32x16();
9997 let r: f32x16 = vgetexpps(a, src:f32x16::ZERO, m:0b11111111_11111111, SAE);
9998 transmute(src:r)
9999 }
10000}
10001
10002/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10003/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10004///
10005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851)
10006#[inline]
10007#[target_feature(enable = "avx512f")]
10008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10009#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10010#[rustc_legacy_const_generics(3)]
10011pub fn _mm512_mask_getexp_round_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10012 unsafe {
10013 static_assert_sae!(SAE);
10014 let a: f32x16 = a.as_f32x16();
10015 let src: f32x16 = src.as_f32x16();
10016 let r: f32x16 = vgetexpps(a, src, m:k, SAE);
10017 transmute(src:r)
10018 }
10019}
10020
10021/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10022/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10023///
10024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852)
10025#[inline]
10026#[target_feature(enable = "avx512f")]
10027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10028#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10029#[rustc_legacy_const_generics(2)]
10030pub fn _mm512_maskz_getexp_round_ps<const SAE: i32>(k: __mmask16, a: __m512) -> __m512 {
10031 unsafe {
10032 static_assert_sae!(SAE);
10033 let a: f32x16 = a.as_f32x16();
10034 let r: f32x16 = vgetexpps(a, src:f32x16::ZERO, m:k, SAE);
10035 transmute(src:r)
10036 }
10037}
10038
10039/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10040/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10041///
10042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847)
10043#[inline]
10044#[target_feature(enable = "avx512f")]
10045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10046#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10047#[rustc_legacy_const_generics(1)]
10048pub fn _mm512_getexp_round_pd<const SAE: i32>(a: __m512d) -> __m512d {
10049 unsafe {
10050 static_assert_sae!(SAE);
10051 let a: f64x8 = a.as_f64x8();
10052 let r: f64x8 = vgetexppd(a, src:f64x8::ZERO, m:0b11111111, SAE);
10053 transmute(src:r)
10054 }
10055}
10056
10057/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10058/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10059///
10060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848)
10061#[inline]
10062#[target_feature(enable = "avx512f")]
10063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10064#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10065#[rustc_legacy_const_generics(3)]
10066pub fn _mm512_mask_getexp_round_pd<const SAE: i32>(
10067 src: __m512d,
10068 k: __mmask8,
10069 a: __m512d,
10070) -> __m512d {
10071 unsafe {
10072 static_assert_sae!(SAE);
10073 let a: f64x8 = a.as_f64x8();
10074 let src: f64x8 = src.as_f64x8();
10075 let r: f64x8 = vgetexppd(a, src, m:k, SAE);
10076 transmute(src:r)
10077 }
10078}
10079
10080/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10081/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10082///
10083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849)
10084#[inline]
10085#[target_feature(enable = "avx512f")]
10086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10087#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10088#[rustc_legacy_const_generics(2)]
10089pub fn _mm512_maskz_getexp_round_pd<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512d {
10090 unsafe {
10091 static_assert_sae!(SAE);
10092 let a: f64x8 = a.as_f64x8();
10093 let r: f64x8 = vgetexppd(a, src:f64x8::ZERO, m:k, SAE);
10094 transmute(src:r)
10095 }
10096}
10097
10098/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10099/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10100/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10101/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10102/// * [`_MM_FROUND_TO_POS_INF`] : round up
10103/// * [`_MM_FROUND_TO_ZERO`] : truncate
10104/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10105///
10106/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790)
10108#[inline]
10109#[target_feature(enable = "avx512f")]
10110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10111#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10112#[rustc_legacy_const_generics(1, 2)]
10113pub fn _mm512_roundscale_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
10114 unsafe {
10115 static_assert_uimm_bits!(IMM8, 8);
10116 static_assert_mantissas_sae!(SAE);
10117 let a: f32x16 = a.as_f32x16();
10118 let r: f32x16 = vrndscaleps(a, IMM8, src:f32x16::ZERO, mask:0b11111111_11111111, SAE);
10119 transmute(src:r)
10120 }
10121}
10122
10123/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10124/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10125/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10126/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10127/// * [`_MM_FROUND_TO_POS_INF`] : round up
10128/// * [`_MM_FROUND_TO_ZERO`] : truncate
10129/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10130///
10131/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788)
10133#[inline]
10134#[target_feature(enable = "avx512f")]
10135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10136#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10137#[rustc_legacy_const_generics(3, 4)]
10138pub fn _mm512_mask_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10139 src: __m512,
10140 k: __mmask16,
10141 a: __m512,
10142) -> __m512 {
10143 unsafe {
10144 static_assert_uimm_bits!(IMM8, 8);
10145 static_assert_mantissas_sae!(SAE);
10146 let a: f32x16 = a.as_f32x16();
10147 let src: f32x16 = src.as_f32x16();
10148 let r: f32x16 = vrndscaleps(a, IMM8, src, mask:k, SAE);
10149 transmute(src:r)
10150 }
10151}
10152
10153/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10154/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10155/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10156/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10157/// * [`_MM_FROUND_TO_POS_INF`] : round up
10158/// * [`_MM_FROUND_TO_ZERO`] : truncate
10159/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10160///
10161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789)
10163#[inline]
10164#[target_feature(enable = "avx512f")]
10165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10166#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10167#[rustc_legacy_const_generics(2, 3)]
10168pub fn _mm512_maskz_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10169 k: __mmask16,
10170 a: __m512,
10171) -> __m512 {
10172 unsafe {
10173 static_assert_uimm_bits!(IMM8, 8);
10174 static_assert_mantissas_sae!(SAE);
10175 let a: f32x16 = a.as_f32x16();
10176 let r: f32x16 = vrndscaleps(a, IMM8, src:f32x16::ZERO, mask:k, SAE);
10177 transmute(src:r)
10178 }
10179}
10180
10181/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10182/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10183/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10184/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10185/// * [`_MM_FROUND_TO_POS_INF`] : round up
10186/// * [`_MM_FROUND_TO_ZERO`] : truncate
10187/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10188///
10189/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787)
10191#[inline]
10192#[target_feature(enable = "avx512f")]
10193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10194#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10195#[rustc_legacy_const_generics(1, 2)]
10196pub fn _mm512_roundscale_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
10197 unsafe {
10198 static_assert_uimm_bits!(IMM8, 8);
10199 static_assert_mantissas_sae!(SAE);
10200 let a: f64x8 = a.as_f64x8();
10201 let r: f64x8 = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:0b11111111, SAE);
10202 transmute(src:r)
10203 }
10204}
10205
10206/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10207/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10208/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10209/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10210/// * [`_MM_FROUND_TO_POS_INF`] : round up
10211/// * [`_MM_FROUND_TO_ZERO`] : truncate
10212/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10213///
10214/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785)
10216#[inline]
10217#[target_feature(enable = "avx512f")]
10218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10219#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10220#[rustc_legacy_const_generics(3, 4)]
10221pub fn _mm512_mask_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10222 src: __m512d,
10223 k: __mmask8,
10224 a: __m512d,
10225) -> __m512d {
10226 unsafe {
10227 static_assert_uimm_bits!(IMM8, 8);
10228 static_assert_mantissas_sae!(SAE);
10229 let a: f64x8 = a.as_f64x8();
10230 let src: f64x8 = src.as_f64x8();
10231 let r: f64x8 = vrndscalepd(a, IMM8, src, mask:k, SAE);
10232 transmute(src:r)
10233 }
10234}
10235
10236/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10237/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10238/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10239/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10240/// * [`_MM_FROUND_TO_POS_INF`] : round up
10241/// * [`_MM_FROUND_TO_ZERO`] : truncate
10242/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10243///
10244/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786)
10246#[inline]
10247#[target_feature(enable = "avx512f")]
10248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10249#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10250#[rustc_legacy_const_generics(2, 3)]
10251pub fn _mm512_maskz_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10252 k: __mmask8,
10253 a: __m512d,
10254) -> __m512d {
10255 unsafe {
10256 static_assert_uimm_bits!(IMM8, 8);
10257 static_assert_mantissas_sae!(SAE);
10258 let a: f64x8 = a.as_f64x8();
10259 let r: f64x8 = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:k, SAE);
10260 transmute(src:r)
10261 }
10262}
10263
10264/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
10265///
10266/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10267/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10268/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10269/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10270/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10271/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10272///
10273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889)
10274#[inline]
10275#[target_feature(enable = "avx512f")]
10276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10277#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10278#[rustc_legacy_const_generics(2)]
10279pub fn _mm512_scalef_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
10280 unsafe {
10281 static_assert_rounding!(ROUNDING);
10282 let a: f32x16 = a.as_f32x16();
10283 let b: f32x16 = b.as_f32x16();
10284 let r: f32x16 = vscalefps(a, b, src:f32x16::ZERO, mask:0b11111111_11111111, ROUNDING);
10285 transmute(src:r)
10286 }
10287}
10288
10289/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10290///
10291/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10292/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10293/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10294/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10295/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10296/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10297///
10298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887)
10299#[inline]
10300#[target_feature(enable = "avx512f")]
10301#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10302#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10303#[rustc_legacy_const_generics(4)]
10304pub fn _mm512_mask_scalef_round_ps<const ROUNDING: i32>(
10305 src: __m512,
10306 k: __mmask16,
10307 a: __m512,
10308 b: __m512,
10309) -> __m512 {
10310 unsafe {
10311 static_assert_rounding!(ROUNDING);
10312 let a: f32x16 = a.as_f32x16();
10313 let b: f32x16 = b.as_f32x16();
10314 let src: f32x16 = src.as_f32x16();
10315 let r: f32x16 = vscalefps(a, b, src, mask:k, ROUNDING);
10316 transmute(src:r)
10317 }
10318}
10319
10320/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10321///
10322/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10323/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10324/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10325/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10326/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10327/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10328///
10329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888)
10330#[inline]
10331#[target_feature(enable = "avx512f")]
10332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10333#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10334#[rustc_legacy_const_generics(3)]
10335pub fn _mm512_maskz_scalef_round_ps<const ROUNDING: i32>(
10336 k: __mmask16,
10337 a: __m512,
10338 b: __m512,
10339) -> __m512 {
10340 unsafe {
10341 static_assert_rounding!(ROUNDING);
10342 let a: f32x16 = a.as_f32x16();
10343 let b: f32x16 = b.as_f32x16();
10344 let r: f32x16 = vscalefps(a, b, src:f32x16::ZERO, mask:k, ROUNDING);
10345 transmute(src:r)
10346 }
10347}
10348
10349/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
10350///
10351/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10352/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10353/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10354/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10355/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10356/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10357///
10358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886)
10359#[inline]
10360#[target_feature(enable = "avx512f")]
10361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10362#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10363#[rustc_legacy_const_generics(2)]
10364pub fn _mm512_scalef_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
10365 unsafe {
10366 static_assert_rounding!(ROUNDING);
10367 let a: f64x8 = a.as_f64x8();
10368 let b: f64x8 = b.as_f64x8();
10369 let r: f64x8 = vscalefpd(a, b, src:f64x8::ZERO, mask:0b11111111, ROUNDING);
10370 transmute(src:r)
10371 }
10372}
10373
10374/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10375///
10376/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10377/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10378/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10379/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10380/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10381/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10382///
10383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884)
10384#[inline]
10385#[target_feature(enable = "avx512f")]
10386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10387#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10388#[rustc_legacy_const_generics(4)]
10389pub fn _mm512_mask_scalef_round_pd<const ROUNDING: i32>(
10390 src: __m512d,
10391 k: __mmask8,
10392 a: __m512d,
10393 b: __m512d,
10394) -> __m512d {
10395 unsafe {
10396 static_assert_rounding!(ROUNDING);
10397 let a: f64x8 = a.as_f64x8();
10398 let b: f64x8 = b.as_f64x8();
10399 let src: f64x8 = src.as_f64x8();
10400 let r: f64x8 = vscalefpd(a, b, src, mask:k, ROUNDING);
10401 transmute(src:r)
10402 }
10403}
10404
10405/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10406///
10407/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10408/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10409/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10410/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10411/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10412/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10413///
10414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885)
10415#[inline]
10416#[target_feature(enable = "avx512f")]
10417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10418#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10419#[rustc_legacy_const_generics(3)]
10420pub fn _mm512_maskz_scalef_round_pd<const ROUNDING: i32>(
10421 k: __mmask8,
10422 a: __m512d,
10423 b: __m512d,
10424) -> __m512d {
10425 unsafe {
10426 static_assert_rounding!(ROUNDING);
10427 let a: f64x8 = a.as_f64x8();
10428 let b: f64x8 = b.as_f64x8();
10429 let r: f64x8 = vscalefpd(a, b, src:f64x8::ZERO, mask:k, ROUNDING);
10430 transmute(src:r)
10431 }
10432}
10433
10434/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10435///
10436/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505)
10438#[inline]
10439#[target_feature(enable = "avx512f")]
10440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10441#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10442#[rustc_legacy_const_generics(3, 4)]
10443pub fn _mm512_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10444 a: __m512,
10445 b: __m512,
10446 c: __m512i,
10447) -> __m512 {
10448 unsafe {
10449 static_assert_uimm_bits!(IMM8, 8);
10450 static_assert_mantissas_sae!(SAE);
10451 let a: f32x16 = a.as_f32x16();
10452 let b: f32x16 = b.as_f32x16();
10453 let c: i32x16 = c.as_i32x16();
10454 let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:0b11111111_11111111, SAE);
10455 transmute(src:r)
10456 }
10457}
10458
10459/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10460///
10461/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506)
10463#[inline]
10464#[target_feature(enable = "avx512f")]
10465#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10466#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10467#[rustc_legacy_const_generics(4, 5)]
10468pub fn _mm512_mask_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10469 a: __m512,
10470 k: __mmask16,
10471 b: __m512,
10472 c: __m512i,
10473) -> __m512 {
10474 unsafe {
10475 static_assert_uimm_bits!(IMM8, 8);
10476 static_assert_mantissas_sae!(SAE);
10477 let a: f32x16 = a.as_f32x16();
10478 let b: f32x16 = b.as_f32x16();
10479 let c: i32x16 = c.as_i32x16();
10480 let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:k, SAE);
10481 transmute(src:r)
10482 }
10483}
10484
10485/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10486///
10487/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507)
10489#[inline]
10490#[target_feature(enable = "avx512f")]
10491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10492#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10493#[rustc_legacy_const_generics(4, 5)]
10494pub fn _mm512_maskz_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10495 k: __mmask16,
10496 a: __m512,
10497 b: __m512,
10498 c: __m512i,
10499) -> __m512 {
10500 unsafe {
10501 static_assert_uimm_bits!(IMM8, 8);
10502 static_assert_mantissas_sae!(SAE);
10503 let a: f32x16 = a.as_f32x16();
10504 let b: f32x16 = b.as_f32x16();
10505 let c: i32x16 = c.as_i32x16();
10506 let r: f32x16 = vfixupimmpsz(a, b, c, IMM8, mask:k, SAE);
10507 transmute(src:r)
10508 }
10509}
10510
10511/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10512///
10513/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502)
10515#[inline]
10516#[target_feature(enable = "avx512f")]
10517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10518#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10519#[rustc_legacy_const_generics(3, 4)]
10520pub fn _mm512_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10521 a: __m512d,
10522 b: __m512d,
10523 c: __m512i,
10524) -> __m512d {
10525 unsafe {
10526 static_assert_uimm_bits!(IMM8, 8);
10527 static_assert_mantissas_sae!(SAE);
10528 let a: f64x8 = a.as_f64x8();
10529 let b: f64x8 = b.as_f64x8();
10530 let c: i64x8 = c.as_i64x8();
10531 let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:0b11111111, SAE);
10532 transmute(src:r)
10533 }
10534}
10535
10536/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10537///
10538/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503)
10540#[inline]
10541#[target_feature(enable = "avx512f")]
10542#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10543#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10544#[rustc_legacy_const_generics(4, 5)]
10545pub fn _mm512_mask_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10546 a: __m512d,
10547 k: __mmask8,
10548 b: __m512d,
10549 c: __m512i,
10550) -> __m512d {
10551 unsafe {
10552 static_assert_uimm_bits!(IMM8, 8);
10553 static_assert_mantissas_sae!(SAE);
10554 let a: f64x8 = a.as_f64x8();
10555 let b: f64x8 = b.as_f64x8();
10556 let c: i64x8 = c.as_i64x8();
10557 let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:k, SAE);
10558 transmute(src:r)
10559 }
10560}
10561
10562/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10563///
10564/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504)
10566#[inline]
10567#[target_feature(enable = "avx512f")]
10568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10569#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10570#[rustc_legacy_const_generics(4, 5)]
10571pub fn _mm512_maskz_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10572 k: __mmask8,
10573 a: __m512d,
10574 b: __m512d,
10575 c: __m512i,
10576) -> __m512d {
10577 unsafe {
10578 static_assert_uimm_bits!(IMM8, 8);
10579 static_assert_mantissas_sae!(SAE);
10580 let a: f64x8 = a.as_f64x8();
10581 let b: f64x8 = b.as_f64x8();
10582 let c: i64x8 = c.as_i64x8();
10583 let r: f64x8 = vfixupimmpdz(a, b, c, IMM8, mask:k, SAE);
10584 transmute(src:r)
10585 }
10586}
10587
10588/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10589/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10590/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10591/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10592/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10593/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10594/// The sign is determined by sc which can take the following values:\
10595/// _MM_MANT_SIGN_src // sign = sign(src)\
10596/// _MM_MANT_SIGN_zero // sign = 0\
10597/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10598/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10599///
10600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886)
10601#[inline]
10602#[target_feature(enable = "avx512f")]
10603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10604#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10605#[rustc_legacy_const_generics(1, 2, 3)]
10606pub fn _mm512_getmant_round_ps<
10607 const NORM: _MM_MANTISSA_NORM_ENUM,
10608 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10609 const SAE: i32,
10610>(
10611 a: __m512,
10612) -> __m512 {
10613 unsafe {
10614 static_assert_uimm_bits!(NORM, 4);
10615 static_assert_uimm_bits!(SIGN, 2);
10616 static_assert_mantissas_sae!(SAE);
10617 let a: f32x16 = a.as_f32x16();
10618 let r: f32x16 = vgetmantps(a, SIGN << 2 | NORM, src:f32x16::ZERO, m:0b11111111_11111111, SAE);
10619 transmute(src:r)
10620 }
10621}
10622
10623/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10624/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10625/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10626/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10627/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10628/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10629/// The sign is determined by sc which can take the following values:\
10630/// _MM_MANT_SIGN_src // sign = sign(src)\
10631/// _MM_MANT_SIGN_zero // sign = 0\
10632/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10633/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10634///
10635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887)
10636#[inline]
10637#[target_feature(enable = "avx512f")]
10638#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10639#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10640#[rustc_legacy_const_generics(3, 4, 5)]
10641pub fn _mm512_mask_getmant_round_ps<
10642 const NORM: _MM_MANTISSA_NORM_ENUM,
10643 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10644 const SAE: i32,
10645>(
10646 src: __m512,
10647 k: __mmask16,
10648 a: __m512,
10649) -> __m512 {
10650 unsafe {
10651 static_assert_uimm_bits!(NORM, 4);
10652 static_assert_uimm_bits!(SIGN, 2);
10653 static_assert_mantissas_sae!(SAE);
10654 let a: f32x16 = a.as_f32x16();
10655 let src: f32x16 = src.as_f32x16();
10656 let r: f32x16 = vgetmantps(a, SIGN << 2 | NORM, src, m:k, SAE);
10657 transmute(src:r)
10658 }
10659}
10660
10661/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10662/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10663/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10664/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10665/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10666/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10667/// The sign is determined by sc which can take the following values:\
10668/// _MM_MANT_SIGN_src // sign = sign(src)\
10669/// _MM_MANT_SIGN_zero // sign = 0\
10670/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10671/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10672///
10673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888)
10674#[inline]
10675#[target_feature(enable = "avx512f")]
10676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10677#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10678#[rustc_legacy_const_generics(2, 3, 4)]
10679pub fn _mm512_maskz_getmant_round_ps<
10680 const NORM: _MM_MANTISSA_NORM_ENUM,
10681 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10682 const SAE: i32,
10683>(
10684 k: __mmask16,
10685 a: __m512,
10686) -> __m512 {
10687 unsafe {
10688 static_assert_uimm_bits!(NORM, 4);
10689 static_assert_uimm_bits!(SIGN, 2);
10690 static_assert_mantissas_sae!(SAE);
10691 let a: f32x16 = a.as_f32x16();
10692 let r: f32x16 = vgetmantps(a, SIGN << 2 | NORM, src:f32x16::ZERO, m:k, SAE);
10693 transmute(src:r)
10694 }
10695}
10696
10697/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10698/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10699/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10700/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10701/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10702/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10703/// The sign is determined by sc which can take the following values:\
10704/// _MM_MANT_SIGN_src // sign = sign(src)\
10705/// _MM_MANT_SIGN_zero // sign = 0\
10706/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10707/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10708///
10709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883)
10710#[inline]
10711#[target_feature(enable = "avx512f")]
10712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10713#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10714#[rustc_legacy_const_generics(1, 2, 3)]
10715pub fn _mm512_getmant_round_pd<
10716 const NORM: _MM_MANTISSA_NORM_ENUM,
10717 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10718 const SAE: i32,
10719>(
10720 a: __m512d,
10721) -> __m512d {
10722 unsafe {
10723 static_assert_uimm_bits!(NORM, 4);
10724 static_assert_uimm_bits!(SIGN, 2);
10725 static_assert_mantissas_sae!(SAE);
10726 let a: f64x8 = a.as_f64x8();
10727 let r: f64x8 = vgetmantpd(a, SIGN << 2 | NORM, src:f64x8::ZERO, m:0b11111111, SAE);
10728 transmute(src:r)
10729 }
10730}
10731
10732/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10733/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10734/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10735/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10736/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10737/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10738/// The sign is determined by sc which can take the following values:\
10739/// _MM_MANT_SIGN_src // sign = sign(src)\
10740/// _MM_MANT_SIGN_zero // sign = 0\
10741/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10742/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10743///
10744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884)
10745#[inline]
10746#[target_feature(enable = "avx512f")]
10747#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10748#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10749#[rustc_legacy_const_generics(3, 4, 5)]
10750pub fn _mm512_mask_getmant_round_pd<
10751 const NORM: _MM_MANTISSA_NORM_ENUM,
10752 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10753 const SAE: i32,
10754>(
10755 src: __m512d,
10756 k: __mmask8,
10757 a: __m512d,
10758) -> __m512d {
10759 unsafe {
10760 static_assert_uimm_bits!(NORM, 4);
10761 static_assert_uimm_bits!(SIGN, 2);
10762 static_assert_mantissas_sae!(SAE);
10763 let a: f64x8 = a.as_f64x8();
10764 let src: f64x8 = src.as_f64x8();
10765 let r: f64x8 = vgetmantpd(a, SIGN << 2 | NORM, src, m:k, SAE);
10766 transmute(src:r)
10767 }
10768}
10769
10770/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10771/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10772/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10773/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10774/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10775/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10776/// The sign is determined by sc which can take the following values:\
10777/// _MM_MANT_SIGN_src // sign = sign(src)\
10778/// _MM_MANT_SIGN_zero // sign = 0\
10779/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10781///
10782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_pd&expand=2885)
10783#[inline]
10784#[target_feature(enable = "avx512f")]
10785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10786#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10787#[rustc_legacy_const_generics(2, 3, 4)]
10788pub fn _mm512_maskz_getmant_round_pd<
10789 const NORM: _MM_MANTISSA_NORM_ENUM,
10790 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10791 const SAE: i32,
10792>(
10793 k: __mmask8,
10794 a: __m512d,
10795) -> __m512d {
10796 unsafe {
10797 static_assert_uimm_bits!(NORM, 4);
10798 static_assert_uimm_bits!(SIGN, 2);
10799 static_assert_mantissas_sae!(SAE);
10800 let a: f64x8 = a.as_f64x8();
10801 let r: f64x8 = vgetmantpd(a, SIGN << 2 | NORM, src:f64x8::ZERO, m:k, SAE);
10802 transmute(src:r)
10803 }
10804}
10805
10806/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
10807///
10808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737)
10809#[inline]
10810#[target_feature(enable = "avx512f")]
10811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10812#[cfg_attr(test, assert_instr(vcvtps2dq))]
10813pub fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
10814 unsafe {
10815 transmute(src:vcvtps2dq(
10816 a.as_f32x16(),
10817 src:i32x16::ZERO,
10818 mask:0b11111111_11111111,
10819 _MM_FROUND_CUR_DIRECTION,
10820 ))
10821 }
10822}
10823
10824/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10825///
10826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738)
10827#[inline]
10828#[target_feature(enable = "avx512f")]
10829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10830#[cfg_attr(test, assert_instr(vcvtps2dq))]
10831pub fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10832 unsafe {
10833 transmute(src:vcvtps2dq(
10834 a.as_f32x16(),
10835 src.as_i32x16(),
10836 mask:k,
10837 _MM_FROUND_CUR_DIRECTION,
10838 ))
10839 }
10840}
10841
10842/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10843///
10844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739)
10845#[inline]
10846#[target_feature(enable = "avx512f")]
10847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10848#[cfg_attr(test, assert_instr(vcvtps2dq))]
10849pub fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
10850 unsafe {
10851 transmute(src:vcvtps2dq(
10852 a.as_f32x16(),
10853 src:i32x16::ZERO,
10854 mask:k,
10855 _MM_FROUND_CUR_DIRECTION,
10856 ))
10857 }
10858}
10859
10860/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10861///
10862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735)
10863#[inline]
10864#[target_feature(enable = "avx512f,avx512vl")]
10865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10866#[cfg_attr(test, assert_instr(vcvtps2dq))]
10867pub fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10868 unsafe {
10869 let convert: __m256i = _mm256_cvtps_epi32(a);
10870 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x8(), no:src.as_i32x8()))
10871 }
10872}
10873
10874/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10875///
10876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736)
10877#[inline]
10878#[target_feature(enable = "avx512f,avx512vl")]
10879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10880#[cfg_attr(test, assert_instr(vcvtps2dq))]
10881pub fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
10882 unsafe {
10883 let convert: __m256i = _mm256_cvtps_epi32(a);
10884 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x8(), no:i32x8::ZERO))
10885 }
10886}
10887
10888/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10889///
10890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732)
10891#[inline]
10892#[target_feature(enable = "avx512f,avx512vl")]
10893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10894#[cfg_attr(test, assert_instr(vcvtps2dq))]
10895pub fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
10896 unsafe {
10897 let convert: __m128i = _mm_cvtps_epi32(a);
10898 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
10899 }
10900}
10901
10902/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10903///
10904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733)
10905#[inline]
10906#[target_feature(enable = "avx512f,avx512vl")]
10907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10908#[cfg_attr(test, assert_instr(vcvtps2dq))]
10909pub fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
10910 unsafe {
10911 let convert: __m128i = _mm_cvtps_epi32(a);
10912 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:i32x4::ZERO))
10913 }
10914}
10915
10916/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10917///
10918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755)
10919#[inline]
10920#[target_feature(enable = "avx512f")]
10921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10922#[cfg_attr(test, assert_instr(vcvtps2udq))]
10923pub fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
10924 unsafe {
10925 transmute(src:vcvtps2udq(
10926 a.as_f32x16(),
10927 src:u32x16::ZERO,
10928 mask:0b11111111_11111111,
10929 _MM_FROUND_CUR_DIRECTION,
10930 ))
10931 }
10932}
10933
10934/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10935///
10936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756)
10937#[inline]
10938#[target_feature(enable = "avx512f")]
10939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10940#[cfg_attr(test, assert_instr(vcvtps2udq))]
10941pub fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10942 unsafe {
10943 transmute(src:vcvtps2udq(
10944 a.as_f32x16(),
10945 src.as_u32x16(),
10946 mask:k,
10947 _MM_FROUND_CUR_DIRECTION,
10948 ))
10949 }
10950}
10951
10952/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10953///
10954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu32&expand=1343)
10955#[inline]
10956#[target_feature(enable = "avx512f")]
10957#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10958#[cfg_attr(test, assert_instr(vcvtps2udq))]
10959pub fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
10960 unsafe {
10961 transmute(src:vcvtps2udq(
10962 a.as_f32x16(),
10963 src:u32x16::ZERO,
10964 mask:k,
10965 _MM_FROUND_CUR_DIRECTION,
10966 ))
10967 }
10968}
10969
10970/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10971///
10972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752)
10973#[inline]
10974#[target_feature(enable = "avx512f,avx512vl")]
10975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10976#[cfg_attr(test, assert_instr(vcvtps2udq))]
10977pub fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
10978 unsafe { transmute(src:vcvtps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:0b11111111)) }
10979}
10980
10981/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10982///
10983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753)
10984#[inline]
10985#[target_feature(enable = "avx512f,avx512vl")]
10986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10987#[cfg_attr(test, assert_instr(vcvtps2udq))]
10988pub fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10989 unsafe { transmute(src:vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), mask:k)) }
10990}
10991
10992/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10993///
10994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754)
10995#[inline]
10996#[target_feature(enable = "avx512f,avx512vl")]
10997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
10998#[cfg_attr(test, assert_instr(vcvtps2udq))]
10999pub fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
11000 unsafe { transmute(src:vcvtps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:k)) }
11001}
11002
11003/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11004///
11005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749)
11006#[inline]
11007#[target_feature(enable = "avx512f,avx512vl")]
11008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11009#[cfg_attr(test, assert_instr(vcvtps2udq))]
11010pub fn _mm_cvtps_epu32(a: __m128) -> __m128i {
11011 unsafe { transmute(src:vcvtps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:0b11111111)) }
11012}
11013
11014/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11015///
11016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750)
11017#[inline]
11018#[target_feature(enable = "avx512f,avx512vl")]
11019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11020#[cfg_attr(test, assert_instr(vcvtps2udq))]
11021pub fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11022 unsafe { transmute(src:vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), mask:k)) }
11023}
11024
11025/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11026///
11027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751)
11028#[inline]
11029#[target_feature(enable = "avx512f,avx512vl")]
11030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11031#[cfg_attr(test, assert_instr(vcvtps2udq))]
11032pub fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
11033 unsafe { transmute(src:vcvtps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:k)) }
11034}
11035
11036/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11037///
11038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769)
11039#[inline]
11040#[target_feature(enable = "avx512f")]
11041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11042#[cfg_attr(test, assert_instr(vcvtps2pd))]
11043pub fn _mm512_cvtps_pd(a: __m256) -> __m512d {
11044 unsafe {
11045 transmute(src:vcvtps2pd(
11046 a.as_f32x8(),
11047 src:f64x8::ZERO,
11048 mask:0b11111111,
11049 _MM_FROUND_CUR_DIRECTION,
11050 ))
11051 }
11052}
11053
11054/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11055///
11056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770)
11057#[inline]
11058#[target_feature(enable = "avx512f")]
11059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11060#[cfg_attr(test, assert_instr(vcvtps2pd))]
11061pub fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
11062 unsafe {
11063 transmute(src:vcvtps2pd(
11064 a.as_f32x8(),
11065 src.as_f64x8(),
11066 mask:k,
11067 _MM_FROUND_CUR_DIRECTION,
11068 ))
11069 }
11070}
11071
11072/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11073///
11074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771)
11075#[inline]
11076#[target_feature(enable = "avx512f")]
11077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11078#[cfg_attr(test, assert_instr(vcvtps2pd))]
11079pub fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
11080 unsafe {
11081 transmute(src:vcvtps2pd(
11082 a.as_f32x8(),
11083 src:f64x8::ZERO,
11084 mask:k,
11085 _MM_FROUND_CUR_DIRECTION,
11086 ))
11087 }
11088}
11089
11090/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
11091///
11092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784)
11093#[inline]
11094#[target_feature(enable = "avx512f")]
11095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11096#[cfg_attr(test, assert_instr(vcvtps2pd))]
11097pub fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
11098 unsafe {
11099 transmute(src:vcvtps2pd(
11100 a:_mm512_castps512_ps256(v2).as_f32x8(),
11101 src:f64x8::ZERO,
11102 mask:0b11111111,
11103 _MM_FROUND_CUR_DIRECTION,
11104 ))
11105 }
11106}
11107
11108/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11109///
11110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785)
11111#[inline]
11112#[target_feature(enable = "avx512f")]
11113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11114#[cfg_attr(test, assert_instr(vcvtps2pd))]
11115pub fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
11116 unsafe {
11117 transmute(src:vcvtps2pd(
11118 a:_mm512_castps512_ps256(v2).as_f32x8(),
11119 src.as_f64x8(),
11120 mask:k,
11121 _MM_FROUND_CUR_DIRECTION,
11122 ))
11123 }
11124}
11125
11126/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
11127///
11128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712)
11129#[inline]
11130#[target_feature(enable = "avx512f")]
11131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11132#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11133pub fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
11134 unsafe {
11135 transmute(src:vcvtpd2ps(
11136 a.as_f64x8(),
11137 src:f32x8::ZERO,
11138 mask:0b11111111,
11139 _MM_FROUND_CUR_DIRECTION,
11140 ))
11141 }
11142}
11143
11144/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11145///
11146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713)
11147#[inline]
11148#[target_feature(enable = "avx512f")]
11149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11150#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11151pub fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
11152 unsafe {
11153 transmute(src:vcvtpd2ps(
11154 a.as_f64x8(),
11155 src.as_f32x8(),
11156 mask:k,
11157 _MM_FROUND_CUR_DIRECTION,
11158 ))
11159 }
11160}
11161
11162/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11163///
11164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714)
11165#[inline]
11166#[target_feature(enable = "avx512f")]
11167#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11168#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11169pub fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
11170 unsafe {
11171 transmute(src:vcvtpd2ps(
11172 a.as_f64x8(),
11173 src:f32x8::ZERO,
11174 mask:k,
11175 _MM_FROUND_CUR_DIRECTION,
11176 ))
11177 }
11178}
11179
11180/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11181///
11182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710)
11183#[inline]
11184#[target_feature(enable = "avx512f,avx512vl")]
11185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11186#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11187pub fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
11188 unsafe {
11189 let convert: __m128 = _mm256_cvtpd_ps(a);
11190 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
11191 }
11192}
11193
11194/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11195///
11196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711)
11197#[inline]
11198#[target_feature(enable = "avx512f,avx512vl")]
11199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11200#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11201pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
11202 unsafe {
11203 let convert: __m128 = _mm256_cvtpd_ps(a);
11204 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:f32x4::ZERO))
11205 }
11206}
11207
11208/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11209///
11210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707)
11211#[inline]
11212#[target_feature(enable = "avx512f,avx512vl")]
11213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11214#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11215pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
11216 unsafe {
11217 let convert: __m128 = _mm_cvtpd_ps(a);
11218 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
11219 }
11220}
11221
11222/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11223///
11224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708)
11225#[inline]
11226#[target_feature(enable = "avx512f,avx512vl")]
11227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11228#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11229pub fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
11230 unsafe {
11231 let convert: __m128 = _mm_cvtpd_ps(a);
11232 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:f32x4::ZERO))
11233 }
11234}
11235
11236/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11237///
11238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675)
11239#[inline]
11240#[target_feature(enable = "avx512f")]
11241#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11242#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11243pub fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
11244 unsafe {
11245 transmute(src:vcvtpd2dq(
11246 a.as_f64x8(),
11247 src:i32x8::ZERO,
11248 mask:0b11111111,
11249 _MM_FROUND_CUR_DIRECTION,
11250 ))
11251 }
11252}
11253
11254/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11255///
11256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676)
11257#[inline]
11258#[target_feature(enable = "avx512f")]
11259#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11260#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11261pub fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11262 unsafe {
11263 transmute(src:vcvtpd2dq(
11264 a.as_f64x8(),
11265 src.as_i32x8(),
11266 mask:k,
11267 _MM_FROUND_CUR_DIRECTION,
11268 ))
11269 }
11270}
11271
11272/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11273///
11274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677)
11275#[inline]
11276#[target_feature(enable = "avx512f")]
11277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11278#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11279pub fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
11280 unsafe {
11281 transmute(src:vcvtpd2dq(
11282 a.as_f64x8(),
11283 src:i32x8::ZERO,
11284 mask:k,
11285 _MM_FROUND_CUR_DIRECTION,
11286 ))
11287 }
11288}
11289
11290/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11291///
11292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673)
11293#[inline]
11294#[target_feature(enable = "avx512f,avx512vl")]
11295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11296#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11297pub fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11298 unsafe {
11299 let convert: __m128i = _mm256_cvtpd_epi32(a);
11300 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
11301 }
11302}
11303
11304/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11305///
11306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674)
11307#[inline]
11308#[target_feature(enable = "avx512f,avx512vl")]
11309#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11310#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11311pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
11312 unsafe {
11313 let convert: __m128i = _mm256_cvtpd_epi32(a);
11314 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:i32x4::ZERO))
11315 }
11316}
11317
11318/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11319///
11320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670)
11321#[inline]
11322#[target_feature(enable = "avx512f,avx512vl")]
11323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11324#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11325pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11326 unsafe {
11327 let convert: __m128i = _mm_cvtpd_epi32(a);
11328 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
11329 }
11330}
11331
11332/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11333///
11334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671)
11335#[inline]
11336#[target_feature(enable = "avx512f,avx512vl")]
11337#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11338#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11339pub fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
11340 unsafe {
11341 let convert: __m128i = _mm_cvtpd_epi32(a);
11342 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:i32x4::ZERO))
11343 }
11344}
11345
11346/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11347///
11348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693)
11349#[inline]
11350#[target_feature(enable = "avx512f")]
11351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11352#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11353pub fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
11354 unsafe {
11355 transmute(src:vcvtpd2udq(
11356 a.as_f64x8(),
11357 src:u32x8::ZERO,
11358 mask:0b11111111,
11359 _MM_FROUND_CUR_DIRECTION,
11360 ))
11361 }
11362}
11363
11364/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11365///
11366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694)
11367#[inline]
11368#[target_feature(enable = "avx512f")]
11369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11370#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11371pub fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11372 unsafe {
11373 transmute(src:vcvtpd2udq(
11374 a.as_f64x8(),
11375 src.as_u32x8(),
11376 mask:k,
11377 _MM_FROUND_CUR_DIRECTION,
11378 ))
11379 }
11380}
11381
11382/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11383///
11384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695)
11385#[inline]
11386#[target_feature(enable = "avx512f")]
11387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11388#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11389pub fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
11390 unsafe {
11391 transmute(src:vcvtpd2udq(
11392 a.as_f64x8(),
11393 src:u32x8::ZERO,
11394 mask:k,
11395 _MM_FROUND_CUR_DIRECTION,
11396 ))
11397 }
11398}
11399
11400/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11401///
11402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690)
11403#[inline]
11404#[target_feature(enable = "avx512f,avx512vl")]
11405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11406#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11407pub fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
11408 unsafe { transmute(src:vcvtpd2udq256(a.as_f64x4(), src:u32x4::ZERO, mask:0b11111111)) }
11409}
11410
11411/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11412///
11413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691)
11414#[inline]
11415#[target_feature(enable = "avx512f,avx512vl")]
11416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11417#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11418pub fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11419 unsafe { transmute(src:vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), mask:k)) }
11420}
11421
11422/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11423///
11424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692)
11425#[inline]
11426#[target_feature(enable = "avx512f,avx512vl")]
11427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11428#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11429pub fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
11430 unsafe { transmute(src:vcvtpd2udq256(a.as_f64x4(), src:u32x4::ZERO, mask:k)) }
11431}
11432
11433/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11434///
11435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687)
11436#[inline]
11437#[target_feature(enable = "avx512f,avx512vl")]
11438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11439#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11440pub fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
11441 unsafe { transmute(src:vcvtpd2udq128(a.as_f64x2(), src:u32x4::ZERO, mask:0b11111111)) }
11442}
11443
11444/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11445///
11446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688)
11447#[inline]
11448#[target_feature(enable = "avx512f,avx512vl")]
11449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11450#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11451pub fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11452 unsafe { transmute(src:vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), mask:k)) }
11453}
11454
11455/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11456///
11457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689)
11458#[inline]
11459#[target_feature(enable = "avx512f,avx512vl")]
11460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11461#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11462pub fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
11463 unsafe { transmute(src:vcvtpd2udq128(a.as_f64x2(), src:u32x4::ZERO, mask:k)) }
11464}
11465
11466/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11467///
11468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715)
11469#[inline]
11470#[target_feature(enable = "avx512f")]
11471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11472#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11473pub fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
11474 unsafe {
11475 let r: f32x8 = vcvtpd2ps(
11476 a:v2.as_f64x8(),
11477 src:f32x8::ZERO,
11478 mask:0b11111111,
11479 _MM_FROUND_CUR_DIRECTION,
11480 );
11481 simd_shuffle!(
11482 r,
11483 f32x8::ZERO,
11484 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11485 )
11486 }
11487}
11488
11489/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11490///
11491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716)
11492#[inline]
11493#[target_feature(enable = "avx512f")]
11494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11495#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11496pub fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
11497 unsafe {
11498 let r: f32x8 = vcvtpd2ps(
11499 a:v2.as_f64x8(),
11500 src:_mm512_castps512_ps256(src).as_f32x8(),
11501 mask:k,
11502 _MM_FROUND_CUR_DIRECTION,
11503 );
11504 simd_shuffle!(
11505 r,
11506 f32x8::ZERO,
11507 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11508 )
11509 }
11510}
11511
11512/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11513///
11514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535)
11515#[inline]
11516#[target_feature(enable = "avx512f")]
11517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11518#[cfg_attr(test, assert_instr(vpmovsxbd))]
11519pub fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
11520 unsafe {
11521 let a: i8x16 = a.as_i8x16();
11522 transmute::<i32x16, _>(src:simd_cast(a))
11523 }
11524}
11525
11526/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11527///
11528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536)
11529#[inline]
11530#[target_feature(enable = "avx512f")]
11531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11532#[cfg_attr(test, assert_instr(vpmovsxbd))]
11533pub fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11534 unsafe {
11535 let convert: i32x16 = _mm512_cvtepi8_epi32(a).as_i32x16();
11536 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11537 }
11538}
11539
11540/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11541///
11542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537)
11543#[inline]
11544#[target_feature(enable = "avx512f")]
11545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11546#[cfg_attr(test, assert_instr(vpmovsxbd))]
11547pub fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11548 unsafe {
11549 let convert: i32x16 = _mm512_cvtepi8_epi32(a).as_i32x16();
11550 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
11551 }
11552}
11553
11554/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11555///
11556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533)
11557#[inline]
11558#[target_feature(enable = "avx512f,avx512vl")]
11559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11560#[cfg_attr(test, assert_instr(vpmovsxbd))]
11561pub fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11562 unsafe {
11563 let convert: i32x8 = _mm256_cvtepi8_epi32(a).as_i32x8();
11564 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11565 }
11566}
11567
11568/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11569///
11570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534)
11571#[inline]
11572#[target_feature(enable = "avx512f,avx512vl")]
11573#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11574#[cfg_attr(test, assert_instr(vpmovsxbd))]
11575pub fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11576 unsafe {
11577 let convert: i32x8 = _mm256_cvtepi8_epi32(a).as_i32x8();
11578 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
11579 }
11580}
11581
11582/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11583///
11584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530)
11585#[inline]
11586#[target_feature(enable = "avx512f,avx512vl")]
11587#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11588#[cfg_attr(test, assert_instr(vpmovsxbd))]
11589pub fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11590 unsafe {
11591 let convert: i32x4 = _mm_cvtepi8_epi32(a).as_i32x4();
11592 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11593 }
11594}
11595
11596/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11597///
11598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531)
11599#[inline]
11600#[target_feature(enable = "avx512f,avx512vl")]
11601#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11602#[cfg_attr(test, assert_instr(vpmovsxbd))]
11603pub fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11604 unsafe {
11605 let convert: i32x4 = _mm_cvtepi8_epi32(a).as_i32x4();
11606 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
11607 }
11608}
11609
11610/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
11611///
11612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544)
11613#[inline]
11614#[target_feature(enable = "avx512f")]
11615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11616#[cfg_attr(test, assert_instr(vpmovsxbq))]
11617pub fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
11618 unsafe {
11619 let a: i8x16 = a.as_i8x16();
11620 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11621 transmute::<i64x8, _>(src:simd_cast(v64))
11622 }
11623}
11624
11625/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11626///
11627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545)
11628#[inline]
11629#[target_feature(enable = "avx512f")]
11630#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11631#[cfg_attr(test, assert_instr(vpmovsxbq))]
11632pub fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11633 unsafe {
11634 let convert: i64x8 = _mm512_cvtepi8_epi64(a).as_i64x8();
11635 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11636 }
11637}
11638
11639/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11640///
11641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546)
11642#[inline]
11643#[target_feature(enable = "avx512f")]
11644#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11645#[cfg_attr(test, assert_instr(vpmovsxbq))]
11646pub fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11647 unsafe {
11648 let convert: i64x8 = _mm512_cvtepi8_epi64(a).as_i64x8();
11649 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
11650 }
11651}
11652
11653/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11654///
11655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542)
11656#[inline]
11657#[target_feature(enable = "avx512f,avx512vl")]
11658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11659#[cfg_attr(test, assert_instr(vpmovsxbq))]
11660pub fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11661 unsafe {
11662 let convert: i64x4 = _mm256_cvtepi8_epi64(a).as_i64x4();
11663 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
11664 }
11665}
11666
11667/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11668///
11669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
11670#[inline]
11671#[target_feature(enable = "avx512f,avx512vl")]
11672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11673#[cfg_attr(test, assert_instr(vpmovsxbq))]
11674pub fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11675 unsafe {
11676 let convert: i64x4 = _mm256_cvtepi8_epi64(a).as_i64x4();
11677 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
11678 }
11679}
11680
11681/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11682///
11683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539)
11684#[inline]
11685#[target_feature(enable = "avx512f,avx512vl")]
11686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11687#[cfg_attr(test, assert_instr(vpmovsxbq))]
11688pub fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11689 unsafe {
11690 let convert: i64x2 = _mm_cvtepi8_epi64(a).as_i64x2();
11691 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
11692 }
11693}
11694
11695/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11696///
11697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540)
11698#[inline]
11699#[target_feature(enable = "avx512f,avx512vl")]
11700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11701#[cfg_attr(test, assert_instr(vpmovsxbq))]
11702pub fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11703 unsafe {
11704 let convert: i64x2 = _mm_cvtepi8_epi64(a).as_i64x2();
11705 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
11706 }
11707}
11708
11709/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11710///
11711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621)
11712#[inline]
11713#[target_feature(enable = "avx512f")]
11714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11715#[cfg_attr(test, assert_instr(vpmovzxbd))]
11716pub fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
11717 unsafe {
11718 let a: u8x16 = a.as_u8x16();
11719 transmute::<i32x16, _>(src:simd_cast(a))
11720 }
11721}
11722
11723/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11724///
11725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622)
11726#[inline]
11727#[target_feature(enable = "avx512f")]
11728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11729#[cfg_attr(test, assert_instr(vpmovzxbd))]
11730pub fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11731 unsafe {
11732 let convert: i32x16 = _mm512_cvtepu8_epi32(a).as_i32x16();
11733 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11734 }
11735}
11736
11737/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11738///
11739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623)
11740#[inline]
11741#[target_feature(enable = "avx512f")]
11742#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11743#[cfg_attr(test, assert_instr(vpmovzxbd))]
11744pub fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11745 unsafe {
11746 let convert: i32x16 = _mm512_cvtepu8_epi32(a).as_i32x16();
11747 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
11748 }
11749}
11750
11751/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11752///
11753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619)
11754#[inline]
11755#[target_feature(enable = "avx512f,avx512vl")]
11756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11757#[cfg_attr(test, assert_instr(vpmovzxbd))]
11758pub fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11759 unsafe {
11760 let convert: i32x8 = _mm256_cvtepu8_epi32(a).as_i32x8();
11761 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11762 }
11763}
11764
11765/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11766///
11767/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
11768#[inline]
11769#[target_feature(enable = "avx512f,avx512vl")]
11770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11771#[cfg_attr(test, assert_instr(vpmovzxbd))]
11772pub fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11773 unsafe {
11774 let convert: i32x8 = _mm256_cvtepu8_epi32(a).as_i32x8();
11775 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
11776 }
11777}
11778
11779/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11780///
11781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616)
11782#[inline]
11783#[target_feature(enable = "avx512f,avx512vl")]
11784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11785#[cfg_attr(test, assert_instr(vpmovzxbd))]
11786pub fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11787 unsafe {
11788 let convert: i32x4 = _mm_cvtepu8_epi32(a).as_i32x4();
11789 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11790 }
11791}
11792
11793/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11794///
11795/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
11796#[inline]
11797#[target_feature(enable = "avx512f,avx512vl")]
11798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11799#[cfg_attr(test, assert_instr(vpmovzxbd))]
11800pub fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11801 unsafe {
11802 let convert: i32x4 = _mm_cvtepu8_epi32(a).as_i32x4();
11803 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
11804 }
11805}
11806
11807/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
11808///
11809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630)
11810#[inline]
11811#[target_feature(enable = "avx512f")]
11812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11813#[cfg_attr(test, assert_instr(vpmovzxbq))]
11814pub fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
11815 unsafe {
11816 let a: u8x16 = a.as_u8x16();
11817 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11818 transmute::<i64x8, _>(src:simd_cast(v64))
11819 }
11820}
11821
11822/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11823///
11824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631)
11825#[inline]
11826#[target_feature(enable = "avx512f")]
11827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11828#[cfg_attr(test, assert_instr(vpmovzxbq))]
11829pub fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11830 unsafe {
11831 let convert: i64x8 = _mm512_cvtepu8_epi64(a).as_i64x8();
11832 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11833 }
11834}
11835
11836/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11837///
11838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632)
11839#[inline]
11840#[target_feature(enable = "avx512f")]
11841#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11842#[cfg_attr(test, assert_instr(vpmovzxbq))]
11843pub fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11844 unsafe {
11845 let convert: i64x8 = _mm512_cvtepu8_epi64(a).as_i64x8();
11846 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
11847 }
11848}
11849
11850/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11851///
11852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628)
11853#[inline]
11854#[target_feature(enable = "avx512f,avx512vl")]
11855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11856#[cfg_attr(test, assert_instr(vpmovzxbq))]
11857pub fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11858 unsafe {
11859 let convert: i64x4 = _mm256_cvtepu8_epi64(a).as_i64x4();
11860 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
11861 }
11862}
11863
11864/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11865///
11866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
11867#[inline]
11868#[target_feature(enable = "avx512f,avx512vl")]
11869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11870#[cfg_attr(test, assert_instr(vpmovzxbq))]
11871pub fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11872 unsafe {
11873 let convert: i64x4 = _mm256_cvtepu8_epi64(a).as_i64x4();
11874 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
11875 }
11876}
11877
11878/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11879///
11880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625)
11881#[inline]
11882#[target_feature(enable = "avx512f,avx512vl")]
11883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11884#[cfg_attr(test, assert_instr(vpmovzxbq))]
11885pub fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11886 unsafe {
11887 let convert: i64x2 = _mm_cvtepu8_epi64(a).as_i64x2();
11888 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
11889 }
11890}
11891
11892/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11893///
11894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626)
11895#[inline]
11896#[target_feature(enable = "avx512f,avx512vl")]
11897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11898#[cfg_attr(test, assert_instr(vpmovzxbq))]
11899pub fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11900 unsafe {
11901 let convert: i64x2 = _mm_cvtepu8_epi64(a).as_i64x2();
11902 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
11903 }
11904}
11905
11906/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
11907///
11908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389)
11909#[inline]
11910#[target_feature(enable = "avx512f")]
11911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11912#[cfg_attr(test, assert_instr(vpmovsxwd))]
11913pub fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
11914 unsafe {
11915 let a: i16x16 = a.as_i16x16();
11916 transmute::<i32x16, _>(src:simd_cast(a))
11917 }
11918}
11919
11920/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11921///
11922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390)
11923#[inline]
11924#[target_feature(enable = "avx512f")]
11925#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11926#[cfg_attr(test, assert_instr(vpmovsxwd))]
11927pub fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
11928 unsafe {
11929 let convert: i32x16 = _mm512_cvtepi16_epi32(a).as_i32x16();
11930 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11931 }
11932}
11933
11934/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11935///
11936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391)
11937#[inline]
11938#[target_feature(enable = "avx512f")]
11939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11940#[cfg_attr(test, assert_instr(vpmovsxwd))]
11941pub fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
11942 unsafe {
11943 let convert: i32x16 = _mm512_cvtepi16_epi32(a).as_i32x16();
11944 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
11945 }
11946}
11947
11948/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11949///
11950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387)
11951#[inline]
11952#[target_feature(enable = "avx512f,avx512vl")]
11953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11954#[cfg_attr(test, assert_instr(vpmovsxwd))]
11955pub fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11956 unsafe {
11957 let convert: i32x8 = _mm256_cvtepi16_epi32(a).as_i32x8();
11958 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11959 }
11960}
11961
11962/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11963///
11964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388)
11965#[inline]
11966#[target_feature(enable = "avx512f,avx512vl")]
11967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11968#[cfg_attr(test, assert_instr(vpmovsxwd))]
11969pub fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
11970 unsafe {
11971 let convert: i32x8 = _mm256_cvtepi16_epi32(a).as_i32x8();
11972 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
11973 }
11974}
11975
11976/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11977///
11978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384)
11979#[inline]
11980#[target_feature(enable = "avx512f,avx512vl")]
11981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11982#[cfg_attr(test, assert_instr(vpmovsxwd))]
11983pub fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11984 unsafe {
11985 let convert: i32x4 = _mm_cvtepi16_epi32(a).as_i32x4();
11986 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11987 }
11988}
11989
11990/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11991///
11992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385)
11993#[inline]
11994#[target_feature(enable = "avx512f,avx512vl")]
11995#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
11996#[cfg_attr(test, assert_instr(vpmovsxwd))]
11997pub fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
11998 unsafe {
11999 let convert: i32x4 = _mm_cvtepi16_epi32(a).as_i32x4();
12000 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
12001 }
12002}
12003
12004/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12005///
12006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398)
12007#[inline]
12008#[target_feature(enable = "avx512f")]
12009#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12010#[cfg_attr(test, assert_instr(vpmovsxwq))]
12011pub fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
12012 unsafe {
12013 let a: i16x8 = a.as_i16x8();
12014 transmute::<i64x8, _>(src:simd_cast(a))
12015 }
12016}
12017
12018/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12019///
12020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399)
12021#[inline]
12022#[target_feature(enable = "avx512f")]
12023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12024#[cfg_attr(test, assert_instr(vpmovsxwq))]
12025pub fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12026 unsafe {
12027 let convert: i64x8 = _mm512_cvtepi16_epi64(a).as_i64x8();
12028 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12029 }
12030}
12031
12032/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12033///
12034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400)
12035#[inline]
12036#[target_feature(enable = "avx512f")]
12037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12038#[cfg_attr(test, assert_instr(vpmovsxwq))]
12039pub fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12040 unsafe {
12041 let convert: i64x8 = _mm512_cvtepi16_epi64(a).as_i64x8();
12042 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12043 }
12044}
12045
12046/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12047///
12048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396)
12049#[inline]
12050#[target_feature(enable = "avx512f,avx512vl")]
12051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12052#[cfg_attr(test, assert_instr(vpmovsxwq))]
12053pub fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12054 unsafe {
12055 let convert: i64x4 = _mm256_cvtepi16_epi64(a).as_i64x4();
12056 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12057 }
12058}
12059
12060/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12061///
12062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397)
12063#[inline]
12064#[target_feature(enable = "avx512f,avx512vl")]
12065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12066#[cfg_attr(test, assert_instr(vpmovsxwq))]
12067pub fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12068 unsafe {
12069 let convert: i64x4 = _mm256_cvtepi16_epi64(a).as_i64x4();
12070 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12071 }
12072}
12073
12074/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12075///
12076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393)
12077#[inline]
12078#[target_feature(enable = "avx512f,avx512vl")]
12079#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12080#[cfg_attr(test, assert_instr(vpmovsxwq))]
12081pub fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12082 unsafe {
12083 let convert: i64x2 = _mm_cvtepi16_epi64(a).as_i64x2();
12084 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12085 }
12086}
12087
12088/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12089///
12090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394)
12091#[inline]
12092#[target_feature(enable = "avx512f,avx512vl")]
12093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12094#[cfg_attr(test, assert_instr(vpmovsxwq))]
12095pub fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12096 unsafe {
12097 let convert: i64x2 = _mm_cvtepi16_epi64(a).as_i64x2();
12098 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12099 }
12100}
12101
12102/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12103///
12104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553)
12105#[inline]
12106#[target_feature(enable = "avx512f")]
12107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12108#[cfg_attr(test, assert_instr(vpmovzxwd))]
12109pub fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
12110 unsafe {
12111 let a: u16x16 = a.as_u16x16();
12112 transmute::<i32x16, _>(src:simd_cast(a))
12113 }
12114}
12115
12116/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12117///
12118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554)
12119#[inline]
12120#[target_feature(enable = "avx512f")]
12121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12122#[cfg_attr(test, assert_instr(vpmovzxwd))]
12123pub fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12124 unsafe {
12125 let convert: i32x16 = _mm512_cvtepu16_epi32(a).as_i32x16();
12126 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
12127 }
12128}
12129
12130/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12131///
12132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555)
12133#[inline]
12134#[target_feature(enable = "avx512f")]
12135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12136#[cfg_attr(test, assert_instr(vpmovzxwd))]
12137pub fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12138 unsafe {
12139 let convert: i32x16 = _mm512_cvtepu16_epi32(a).as_i32x16();
12140 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
12141 }
12142}
12143
12144/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12145///
12146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551)
12147#[inline]
12148#[target_feature(enable = "avx512f,avx512vl")]
12149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12150#[cfg_attr(test, assert_instr(vpmovzxwd))]
12151pub fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12152 unsafe {
12153 let convert: i32x8 = _mm256_cvtepu16_epi32(a).as_i32x8();
12154 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
12155 }
12156}
12157
12158/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12159///
12160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
12161#[inline]
12162#[target_feature(enable = "avx512f,avx512vl")]
12163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12164#[cfg_attr(test, assert_instr(vpmovzxwd))]
12165pub fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12166 unsafe {
12167 let convert: i32x8 = _mm256_cvtepu16_epi32(a).as_i32x8();
12168 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
12169 }
12170}
12171
12172/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12173///
12174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548)
12175#[inline]
12176#[target_feature(enable = "avx512f,avx512vl")]
12177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12178#[cfg_attr(test, assert_instr(vpmovzxwd))]
12179pub fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12180 unsafe {
12181 let convert: i32x4 = _mm_cvtepu16_epi32(a).as_i32x4();
12182 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
12183 }
12184}
12185
12186/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12187///
12188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549)
12189#[inline]
12190#[target_feature(enable = "avx512f,avx512vl")]
12191#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12192#[cfg_attr(test, assert_instr(vpmovzxwd))]
12193pub fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12194 unsafe {
12195 let convert: i32x4 = _mm_cvtepu16_epi32(a).as_i32x4();
12196 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
12197 }
12198}
12199
12200/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12201///
12202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562)
12203#[inline]
12204#[target_feature(enable = "avx512f")]
12205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12206#[cfg_attr(test, assert_instr(vpmovzxwq))]
12207pub fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
12208 unsafe {
12209 let a: u16x8 = a.as_u16x8();
12210 transmute::<i64x8, _>(src:simd_cast(a))
12211 }
12212}
12213
12214/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12215///
12216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563)
12217#[inline]
12218#[target_feature(enable = "avx512f")]
12219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12220#[cfg_attr(test, assert_instr(vpmovzxwq))]
12221pub fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12222 unsafe {
12223 let convert: i64x8 = _mm512_cvtepu16_epi64(a).as_i64x8();
12224 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12225 }
12226}
12227
12228/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12229///
12230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564)
12231#[inline]
12232#[target_feature(enable = "avx512f")]
12233#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12234#[cfg_attr(test, assert_instr(vpmovzxwq))]
12235pub fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12236 unsafe {
12237 let convert: i64x8 = _mm512_cvtepu16_epi64(a).as_i64x8();
12238 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12239 }
12240}
12241
12242/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12243///
12244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560)
12245#[inline]
12246#[target_feature(enable = "avx512f,avx512vl")]
12247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12248#[cfg_attr(test, assert_instr(vpmovzxwq))]
12249pub fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12250 unsafe {
12251 let convert: i64x4 = _mm256_cvtepu16_epi64(a).as_i64x4();
12252 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12253 }
12254}
12255
12256/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12257///
12258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
12259#[inline]
12260#[target_feature(enable = "avx512f,avx512vl")]
12261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12262#[cfg_attr(test, assert_instr(vpmovzxwq))]
12263pub fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12264 unsafe {
12265 let convert: i64x4 = _mm256_cvtepu16_epi64(a).as_i64x4();
12266 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12267 }
12268}
12269
12270/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12271///
12272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557)
12273#[inline]
12274#[target_feature(enable = "avx512f,avx512vl")]
12275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12276#[cfg_attr(test, assert_instr(vpmovzxwq))]
12277pub fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12278 unsafe {
12279 let convert: i64x2 = _mm_cvtepu16_epi64(a).as_i64x2();
12280 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12281 }
12282}
12283
12284/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12285///
12286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558)
12287#[inline]
12288#[target_feature(enable = "avx512f,avx512vl")]
12289#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12290#[cfg_attr(test, assert_instr(vpmovzxwq))]
12291pub fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12292 unsafe {
12293 let convert: i64x2 = _mm_cvtepu16_epi64(a).as_i64x2();
12294 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12295 }
12296}
12297
12298/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12299///
12300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428)
12301#[inline]
12302#[target_feature(enable = "avx512f")]
12303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12304#[cfg_attr(test, assert_instr(vpmovsxdq))]
12305pub fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
12306 unsafe {
12307 let a: i32x8 = a.as_i32x8();
12308 transmute::<i64x8, _>(src:simd_cast(a))
12309 }
12310}
12311
12312/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12313///
12314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429)
12315#[inline]
12316#[target_feature(enable = "avx512f")]
12317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12318#[cfg_attr(test, assert_instr(vpmovsxdq))]
12319pub fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12320 unsafe {
12321 let convert: i64x8 = _mm512_cvtepi32_epi64(a).as_i64x8();
12322 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12323 }
12324}
12325
12326/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12327///
12328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430)
12329#[inline]
12330#[target_feature(enable = "avx512f")]
12331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12332#[cfg_attr(test, assert_instr(vpmovsxdq))]
12333pub fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12334 unsafe {
12335 let convert: i64x8 = _mm512_cvtepi32_epi64(a).as_i64x8();
12336 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12337 }
12338}
12339
12340/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12341///
12342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426)
12343#[inline]
12344#[target_feature(enable = "avx512f,avx512vl")]
12345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12346#[cfg_attr(test, assert_instr(vpmovsxdq))]
12347pub fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12348 unsafe {
12349 let convert: i64x4 = _mm256_cvtepi32_epi64(a).as_i64x4();
12350 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12351 }
12352}
12353
12354/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12355///
12356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427)
12357#[inline]
12358#[target_feature(enable = "avx512f,avx512vl")]
12359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12360#[cfg_attr(test, assert_instr(vpmovsxdq))]
12361pub fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12362 unsafe {
12363 let convert: i64x4 = _mm256_cvtepi32_epi64(a).as_i64x4();
12364 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12365 }
12366}
12367
12368/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12369///
12370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423)
12371#[inline]
12372#[target_feature(enable = "avx512f,avx512vl")]
12373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12374#[cfg_attr(test, assert_instr(vpmovsxdq))]
12375pub fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12376 unsafe {
12377 let convert: i64x2 = _mm_cvtepi32_epi64(a).as_i64x2();
12378 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12379 }
12380}
12381
12382/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12383///
12384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424)
12385#[inline]
12386#[target_feature(enable = "avx512f,avx512vl")]
12387#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12388#[cfg_attr(test, assert_instr(vpmovsxdq))]
12389pub fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12390 unsafe {
12391 let convert: i64x2 = _mm_cvtepi32_epi64(a).as_i64x2();
12392 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12393 }
12394}
12395
12396/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12397///
12398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571)
12399#[inline]
12400#[target_feature(enable = "avx512f")]
12401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12402#[cfg_attr(test, assert_instr(vpmovzxdq))]
12403pub fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
12404 unsafe {
12405 let a: u32x8 = a.as_u32x8();
12406 transmute::<i64x8, _>(src:simd_cast(a))
12407 }
12408}
12409
12410/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12411///
12412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572)
12413#[inline]
12414#[target_feature(enable = "avx512f")]
12415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12416#[cfg_attr(test, assert_instr(vpmovzxdq))]
12417pub fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12418 unsafe {
12419 let convert: i64x8 = _mm512_cvtepu32_epi64(a).as_i64x8();
12420 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12421 }
12422}
12423
12424/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12425///
12426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573)
12427#[inline]
12428#[target_feature(enable = "avx512f")]
12429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12430#[cfg_attr(test, assert_instr(vpmovzxdq))]
12431pub fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12432 unsafe {
12433 let convert: i64x8 = _mm512_cvtepu32_epi64(a).as_i64x8();
12434 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12435 }
12436}
12437
12438/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12439///
12440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569)
12441#[inline]
12442#[target_feature(enable = "avx512f,avx512vl")]
12443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12444#[cfg_attr(test, assert_instr(vpmovzxdq))]
12445pub fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12446 unsafe {
12447 let convert: i64x4 = _mm256_cvtepu32_epi64(a).as_i64x4();
12448 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12449 }
12450}
12451
12452/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12453///
12454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
12455#[inline]
12456#[target_feature(enable = "avx512f,avx512vl")]
12457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12458#[cfg_attr(test, assert_instr(vpmovzxdq))]
12459pub fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12460 unsafe {
12461 let convert: i64x4 = _mm256_cvtepu32_epi64(a).as_i64x4();
12462 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12463 }
12464}
12465
12466/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12467///
12468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566)
12469#[inline]
12470#[target_feature(enable = "avx512f,avx512vl")]
12471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12472#[cfg_attr(test, assert_instr(vpmovzxdq))]
12473pub fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12474 unsafe {
12475 let convert: i64x2 = _mm_cvtepu32_epi64(a).as_i64x2();
12476 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12477 }
12478}
12479
12480/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12481///
12482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567)
12483#[inline]
12484#[target_feature(enable = "avx512f,avx512vl")]
12485#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12486#[cfg_attr(test, assert_instr(vpmovzxdq))]
12487pub fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12488 unsafe {
12489 let convert: i64x2 = _mm_cvtepu32_epi64(a).as_i64x2();
12490 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12491 }
12492}
12493
12494/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12495///
12496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455)
12497#[inline]
12498#[target_feature(enable = "avx512f")]
12499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12500#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12501pub fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
12502 unsafe {
12503 let a: i32x16 = a.as_i32x16();
12504 transmute::<f32x16, _>(src:simd_cast(a))
12505 }
12506}
12507
12508/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12509///
12510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456)
12511#[inline]
12512#[target_feature(enable = "avx512f")]
12513#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12514#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12515pub fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12516 unsafe {
12517 let convert: f32x16 = _mm512_cvtepi32_ps(a).as_f32x16();
12518 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x16()))
12519 }
12520}
12521
12522/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12523///
12524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457)
12525#[inline]
12526#[target_feature(enable = "avx512f")]
12527#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12528#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12529pub fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
12530 unsafe {
12531 let convert: f32x16 = _mm512_cvtepi32_ps(a).as_f32x16();
12532 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x16::ZERO))
12533 }
12534}
12535
12536/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12537///
12538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453)
12539#[inline]
12540#[target_feature(enable = "avx512f,avx512vl")]
12541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12542#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12543pub fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
12544 unsafe {
12545 let convert: f32x8 = _mm256_cvtepi32_ps(a).as_f32x8();
12546 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x8()))
12547 }
12548}
12549
12550/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12551///
12552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454)
12553#[inline]
12554#[target_feature(enable = "avx512f,avx512vl")]
12555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12556#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12557pub fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
12558 unsafe {
12559 let convert: f32x8 = _mm256_cvtepi32_ps(a).as_f32x8();
12560 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x8::ZERO))
12561 }
12562}
12563
12564/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12565///
12566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450)
12567#[inline]
12568#[target_feature(enable = "avx512f,avx512vl")]
12569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12570#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12571pub fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
12572 unsafe {
12573 let convert: f32x4 = _mm_cvtepi32_ps(a).as_f32x4();
12574 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x4()))
12575 }
12576}
12577
12578/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12579///
12580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451)
12581#[inline]
12582#[target_feature(enable = "avx512f,avx512vl")]
12583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12584#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12585pub fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
12586 unsafe {
12587 let convert: f32x4 = _mm_cvtepi32_ps(a).as_f32x4();
12588 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x4::ZERO))
12589 }
12590}
12591
12592/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12593///
12594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446)
12595#[inline]
12596#[target_feature(enable = "avx512f")]
12597#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12598#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12599pub fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
12600 unsafe {
12601 let a: i32x8 = a.as_i32x8();
12602 transmute::<f64x8, _>(src:simd_cast(a))
12603 }
12604}
12605
12606/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12607///
12608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447)
12609#[inline]
12610#[target_feature(enable = "avx512f")]
12611#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12612#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12613pub fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12614 unsafe {
12615 let convert: f64x8 = _mm512_cvtepi32_pd(a).as_f64x8();
12616 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12617 }
12618}
12619
12620/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12621///
12622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448)
12623#[inline]
12624#[target_feature(enable = "avx512f")]
12625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12626#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12627pub fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
12628 unsafe {
12629 let convert: f64x8 = _mm512_cvtepi32_pd(a).as_f64x8();
12630 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x8::ZERO))
12631 }
12632}
12633
12634/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12635///
12636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444)
12637#[inline]
12638#[target_feature(enable = "avx512f,avx512vl")]
12639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12640#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12641pub fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12642 unsafe {
12643 let convert: f64x4 = _mm256_cvtepi32_pd(a).as_f64x4();
12644 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x4()))
12645 }
12646}
12647
12648/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12649///
12650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445)
12651#[inline]
12652#[target_feature(enable = "avx512f,avx512vl")]
12653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12654#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12655pub fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
12656 unsafe {
12657 let convert: f64x4 = _mm256_cvtepi32_pd(a).as_f64x4();
12658 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x4::ZERO))
12659 }
12660}
12661
12662/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12663///
12664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441)
12665#[inline]
12666#[target_feature(enable = "avx512f,avx512vl")]
12667#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12668#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12669pub fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12670 unsafe {
12671 let convert: f64x2 = _mm_cvtepi32_pd(a).as_f64x2();
12672 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x2()))
12673 }
12674}
12675
12676/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12677///
12678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442)
12679#[inline]
12680#[target_feature(enable = "avx512f,avx512vl")]
12681#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12682#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12683pub fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
12684 unsafe {
12685 let convert: f64x2 = _mm_cvtepi32_pd(a).as_f64x2();
12686 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x2::ZERO))
12687 }
12688}
12689
12690/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12691///
12692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583)
12693#[inline]
12694#[target_feature(enable = "avx512f")]
12695#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12696#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12697pub fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
12698 unsafe {
12699 let a: u32x16 = a.as_u32x16();
12700 transmute::<f32x16, _>(src:simd_cast(a))
12701 }
12702}
12703
12704/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12705///
12706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584)
12707#[inline]
12708#[target_feature(enable = "avx512f")]
12709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12710#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12711pub fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12712 unsafe {
12713 let convert: f32x16 = _mm512_cvtepu32_ps(a).as_f32x16();
12714 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x16()))
12715 }
12716}
12717
12718/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12719///
12720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585)
12721#[inline]
12722#[target_feature(enable = "avx512f")]
12723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12724#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12725pub fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
12726 unsafe {
12727 let convert: f32x16 = _mm512_cvtepu32_ps(a).as_f32x16();
12728 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x16::ZERO))
12729 }
12730}
12731
12732/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12733///
12734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580)
12735#[inline]
12736#[target_feature(enable = "avx512f")]
12737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12738#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12739pub fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
12740 unsafe {
12741 let a: u32x8 = a.as_u32x8();
12742 transmute::<f64x8, _>(src:simd_cast(a))
12743 }
12744}
12745
12746/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12747///
12748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581)
12749#[inline]
12750#[target_feature(enable = "avx512f")]
12751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12752#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12753pub fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12754 unsafe {
12755 let convert: f64x8 = _mm512_cvtepu32_pd(a).as_f64x8();
12756 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12757 }
12758}
12759
12760/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12761///
12762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582)
12763#[inline]
12764#[target_feature(enable = "avx512f")]
12765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12766#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12767pub fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
12768 unsafe {
12769 let convert: f64x8 = _mm512_cvtepu32_pd(a).as_f64x8();
12770 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x8::ZERO))
12771 }
12772}
12773
12774/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12775///
12776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577)
12777#[inline]
12778#[target_feature(enable = "avx512f,avx512vl")]
12779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12780#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12781pub fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
12782 unsafe {
12783 let a: u32x4 = a.as_u32x4();
12784 transmute::<f64x4, _>(src:simd_cast(a))
12785 }
12786}
12787
12788/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12789///
12790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578)
12791#[inline]
12792#[target_feature(enable = "avx512f,avx512vl")]
12793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12794#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12795pub fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12796 unsafe {
12797 let convert: f64x4 = _mm256_cvtepu32_pd(a).as_f64x4();
12798 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x4()))
12799 }
12800}
12801
12802/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12803///
12804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579)
12805#[inline]
12806#[target_feature(enable = "avx512f,avx512vl")]
12807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12808#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12809pub fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
12810 unsafe {
12811 let convert: f64x4 = _mm256_cvtepu32_pd(a).as_f64x4();
12812 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x4::ZERO))
12813 }
12814}
12815
12816/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12817///
12818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574)
12819#[inline]
12820#[target_feature(enable = "avx512f,avx512vl")]
12821#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12822#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12823pub fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
12824 unsafe {
12825 let a: u32x4 = a.as_u32x4();
12826 let u64: u32x2 = simd_shuffle!(a, a, [0, 1]);
12827 transmute::<f64x2, _>(src:simd_cast(u64))
12828 }
12829}
12830
12831/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12832///
12833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575)
12834#[inline]
12835#[target_feature(enable = "avx512f,avx512vl")]
12836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12837#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12838pub fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12839 unsafe {
12840 let convert: f64x2 = _mm_cvtepu32_pd(a).as_f64x2();
12841 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x2()))
12842 }
12843}
12844
12845/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12846///
12847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576)
12848#[inline]
12849#[target_feature(enable = "avx512f,avx512vl")]
12850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12851#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12852pub fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
12853 unsafe {
12854 let convert: f64x2 = _mm_cvtepu32_pd(a).as_f64x2();
12855 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x2::ZERO))
12856 }
12857}
12858
12859/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12860///
12861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464)
12862#[inline]
12863#[target_feature(enable = "avx512f")]
12864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12865#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12866pub fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
12867 unsafe {
12868 let v2: i32x16 = v2.as_i32x16();
12869 let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12870 transmute::<f64x8, _>(src:simd_cast(v256))
12871 }
12872}
12873
12874/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12875///
12876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465)
12877#[inline]
12878#[target_feature(enable = "avx512f")]
12879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12880#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12881pub fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12882 unsafe {
12883 let convert: f64x8 = _mm512_cvtepi32lo_pd(v2).as_f64x8();
12884 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12885 }
12886}
12887
12888/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12889///
12890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586)
12891#[inline]
12892#[target_feature(enable = "avx512f")]
12893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12894#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12895pub fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
12896 unsafe {
12897 let v2: u32x16 = v2.as_u32x16();
12898 let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12899 transmute::<f64x8, _>(src:simd_cast(v256))
12900 }
12901}
12902
12903/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12904///
12905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587)
12906#[inline]
12907#[target_feature(enable = "avx512f")]
12908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12909#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12910pub fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12911 unsafe {
12912 let convert: f64x8 = _mm512_cvtepu32lo_pd(v2).as_f64x8();
12913 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12914 }
12915}
12916
12917/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12918///
12919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419)
12920#[inline]
12921#[target_feature(enable = "avx512f")]
12922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12923#[cfg_attr(test, assert_instr(vpmovdw))]
12924pub fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
12925 unsafe {
12926 let a: i32x16 = a.as_i32x16();
12927 transmute::<i16x16, _>(src:simd_cast(a))
12928 }
12929}
12930
12931/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12932///
12933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420)
12934#[inline]
12935#[target_feature(enable = "avx512f")]
12936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12937#[cfg_attr(test, assert_instr(vpmovdw))]
12938pub fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
12939 unsafe {
12940 let convert: i16x16 = _mm512_cvtepi32_epi16(a).as_i16x16();
12941 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x16()))
12942 }
12943}
12944
12945/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12946///
12947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421)
12948#[inline]
12949#[target_feature(enable = "avx512f")]
12950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12951#[cfg_attr(test, assert_instr(vpmovdw))]
12952pub fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
12953 unsafe {
12954 let convert: i16x16 = _mm512_cvtepi32_epi16(a).as_i16x16();
12955 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i16x16::ZERO))
12956 }
12957}
12958
12959/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12960///
12961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416)
12962#[inline]
12963#[target_feature(enable = "avx512f,avx512vl")]
12964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12965#[cfg_attr(test, assert_instr(vpmovdw))]
12966pub fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
12967 unsafe {
12968 let a: i32x8 = a.as_i32x8();
12969 transmute::<i16x8, _>(src:simd_cast(a))
12970 }
12971}
12972
12973/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12974///
12975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417)
12976#[inline]
12977#[target_feature(enable = "avx512f,avx512vl")]
12978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12979#[cfg_attr(test, assert_instr(vpmovdw))]
12980pub fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12981 unsafe {
12982 let convert: i16x8 = _mm256_cvtepi32_epi16(a).as_i16x8();
12983 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x8()))
12984 }
12985}
12986
12987/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12988///
12989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418)
12990#[inline]
12991#[target_feature(enable = "avx512f,avx512vl")]
12992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
12993#[cfg_attr(test, assert_instr(vpmovdw))]
12994pub fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
12995 unsafe {
12996 let convert: i16x8 = _mm256_cvtepi32_epi16(a).as_i16x8();
12997 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i16x8::ZERO))
12998 }
12999}
13000
13001/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13002///
13003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413)
13004#[inline]
13005#[target_feature(enable = "avx512f,avx512vl")]
13006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13007#[cfg_attr(test, assert_instr(vpmovdw))]
13008pub fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
13009 unsafe { transmute(src:vpmovdw128(a.as_i32x4(), src:i16x8::ZERO, mask:0b11111111)) }
13010}
13011
13012/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13013///
13014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414)
13015#[inline]
13016#[target_feature(enable = "avx512f,avx512vl")]
13017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13018#[cfg_attr(test, assert_instr(vpmovdw))]
13019pub fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13020 unsafe { transmute(src:vpmovdw128(a.as_i32x4(), src.as_i16x8(), mask:k)) }
13021}
13022
13023/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13024///
13025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415)
13026#[inline]
13027#[target_feature(enable = "avx512f,avx512vl")]
13028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13029#[cfg_attr(test, assert_instr(vpmovdw))]
13030pub fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13031 unsafe { transmute(src:vpmovdw128(a.as_i32x4(), src:i16x8::ZERO, mask:k)) }
13032}
13033
13034/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13035///
13036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437)
13037#[inline]
13038#[target_feature(enable = "avx512f")]
13039#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13040#[cfg_attr(test, assert_instr(vpmovdb))]
13041pub fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
13042 unsafe {
13043 let a: i32x16 = a.as_i32x16();
13044 transmute::<i8x16, _>(src:simd_cast(a))
13045 }
13046}
13047
13048/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13049///
13050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438)
13051#[inline]
13052#[target_feature(enable = "avx512f")]
13053#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13054#[cfg_attr(test, assert_instr(vpmovdb))]
13055pub fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13056 unsafe {
13057 let convert: i8x16 = _mm512_cvtepi32_epi8(a).as_i8x16();
13058 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i8x16()))
13059 }
13060}
13061
13062/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13063///
13064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439)
13065#[inline]
13066#[target_feature(enable = "avx512f")]
13067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13068#[cfg_attr(test, assert_instr(vpmovdb))]
13069pub fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13070 unsafe {
13071 let convert: i8x16 = _mm512_cvtepi32_epi8(a).as_i8x16();
13072 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i8x16::ZERO))
13073 }
13074}
13075
13076/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13077///
13078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434)
13079#[inline]
13080#[target_feature(enable = "avx512f,avx512vl")]
13081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13082#[cfg_attr(test, assert_instr(vpmovdb))]
13083pub fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
13084 unsafe { transmute(src:vpmovdb256(a.as_i32x8(), src:i8x16::ZERO, mask:0b11111111)) }
13085}
13086
13087/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13088///
13089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435)
13090#[inline]
13091#[target_feature(enable = "avx512f,avx512vl")]
13092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13093#[cfg_attr(test, assert_instr(vpmovdb))]
13094pub fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13095 unsafe { transmute(src:vpmovdb256(a.as_i32x8(), src.as_i8x16(), mask:k)) }
13096}
13097
13098/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13099///
13100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436)
13101#[inline]
13102#[target_feature(enable = "avx512f,avx512vl")]
13103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13104#[cfg_attr(test, assert_instr(vpmovdb))]
13105pub fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13106 unsafe { transmute(src:vpmovdb256(a.as_i32x8(), src:i8x16::ZERO, mask:k)) }
13107}
13108
13109/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13110///
13111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431)
13112#[inline]
13113#[target_feature(enable = "avx512f,avx512vl")]
13114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13115#[cfg_attr(test, assert_instr(vpmovdb))]
13116pub fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
13117 unsafe { transmute(src:vpmovdb128(a.as_i32x4(), src:i8x16::ZERO, mask:0b11111111)) }
13118}
13119
13120/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13121///
13122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432)
13123#[inline]
13124#[target_feature(enable = "avx512f,avx512vl")]
13125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13126#[cfg_attr(test, assert_instr(vpmovdb))]
13127pub fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13128 unsafe { transmute(src:vpmovdb128(a.as_i32x4(), src.as_i8x16(), mask:k)) }
13129}
13130
13131/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13132///
13133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433)
13134#[inline]
13135#[target_feature(enable = "avx512f,avx512vl")]
13136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13137#[cfg_attr(test, assert_instr(vpmovdb))]
13138pub fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13139 unsafe { transmute(src:vpmovdb128(a.as_i32x4(), src:i8x16::ZERO, mask:k)) }
13140}
13141
13142/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13143///
13144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481)
13145#[inline]
13146#[target_feature(enable = "avx512f")]
13147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13148#[cfg_attr(test, assert_instr(vpmovqd))]
13149pub fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
13150 unsafe {
13151 let a: i64x8 = a.as_i64x8();
13152 transmute::<i32x8, _>(src:simd_cast(a))
13153 }
13154}
13155
13156/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13157///
13158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482)
13159#[inline]
13160#[target_feature(enable = "avx512f")]
13161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13162#[cfg_attr(test, assert_instr(vpmovqd))]
13163pub fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13164 unsafe {
13165 let convert: i32x8 = _mm512_cvtepi64_epi32(a).as_i32x8();
13166 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
13167 }
13168}
13169
13170/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13171///
13172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483)
13173#[inline]
13174#[target_feature(enable = "avx512f")]
13175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13176#[cfg_attr(test, assert_instr(vpmovqd))]
13177pub fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13178 unsafe {
13179 let convert: i32x8 = _mm512_cvtepi64_epi32(a).as_i32x8();
13180 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
13181 }
13182}
13183
13184/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13185///
13186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478)
13187#[inline]
13188#[target_feature(enable = "avx512f,avx512vl")]
13189#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13190#[cfg_attr(test, assert_instr(vpmovqd))]
13191pub fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
13192 unsafe {
13193 let a: i64x4 = a.as_i64x4();
13194 transmute::<i32x4, _>(src:simd_cast(a))
13195 }
13196}
13197
13198/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13199///
13200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479)
13201#[inline]
13202#[target_feature(enable = "avx512f,avx512vl")]
13203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13204#[cfg_attr(test, assert_instr(vpmovqd))]
13205pub fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13206 unsafe {
13207 let convert: i32x4 = _mm256_cvtepi64_epi32(a).as_i32x4();
13208 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
13209 }
13210}
13211
13212/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13213///
13214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480)
13215#[inline]
13216#[target_feature(enable = "avx512f,avx512vl")]
13217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13218#[cfg_attr(test, assert_instr(vpmovqd))]
13219pub fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13220 unsafe {
13221 let convert: i32x4 = _mm256_cvtepi64_epi32(a).as_i32x4();
13222 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
13223 }
13224}
13225
13226/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13227///
13228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475)
13229#[inline]
13230#[target_feature(enable = "avx512f,avx512vl")]
13231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13232#[cfg_attr(test, assert_instr(vpmovqd))]
13233pub fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
13234 unsafe { transmute(src:vpmovqd128(a.as_i64x2(), src:i32x4::ZERO, mask:0b11111111)) }
13235}
13236
13237/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13238///
13239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476)
13240#[inline]
13241#[target_feature(enable = "avx512f,avx512vl")]
13242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13243#[cfg_attr(test, assert_instr(vpmovqd))]
13244pub fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13245 unsafe { transmute(src:vpmovqd128(a.as_i64x2(), src.as_i32x4(), mask:k)) }
13246}
13247
13248/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13249///
13250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477)
13251#[inline]
13252#[target_feature(enable = "avx512f,avx512vl")]
13253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13254#[cfg_attr(test, assert_instr(vpmovqd))]
13255pub fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13256 unsafe { transmute(src:vpmovqd128(a.as_i64x2(), src:i32x4::ZERO, mask:k)) }
13257}
13258
13259/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13260///
13261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472)
13262#[inline]
13263#[target_feature(enable = "avx512f")]
13264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13265#[cfg_attr(test, assert_instr(vpmovqw))]
13266pub fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
13267 unsafe {
13268 let a: i64x8 = a.as_i64x8();
13269 transmute::<i16x8, _>(src:simd_cast(a))
13270 }
13271}
13272
13273/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13274///
13275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473)
13276#[inline]
13277#[target_feature(enable = "avx512f")]
13278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13279#[cfg_attr(test, assert_instr(vpmovqw))]
13280pub fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13281 unsafe {
13282 let convert: i16x8 = _mm512_cvtepi64_epi16(a).as_i16x8();
13283 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x8()))
13284 }
13285}
13286
13287/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13288///
13289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474)
13290#[inline]
13291#[target_feature(enable = "avx512f")]
13292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13293#[cfg_attr(test, assert_instr(vpmovqw))]
13294pub fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13295 unsafe {
13296 let convert: i16x8 = _mm512_cvtepi64_epi16(a).as_i16x8();
13297 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i16x8::ZERO))
13298 }
13299}
13300
13301/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13302///
13303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469)
13304#[inline]
13305#[target_feature(enable = "avx512f,avx512vl")]
13306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13307#[cfg_attr(test, assert_instr(vpmovqw))]
13308pub fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
13309 unsafe { transmute(src:vpmovqw256(a.as_i64x4(), src:i16x8::ZERO, mask:0b11111111)) }
13310}
13311
13312/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13313///
13314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470)
13315#[inline]
13316#[target_feature(enable = "avx512f,avx512vl")]
13317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13318#[cfg_attr(test, assert_instr(vpmovqw))]
13319pub fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13320 unsafe { transmute(src:vpmovqw256(a.as_i64x4(), src.as_i16x8(), mask:k)) }
13321}
13322
13323/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13324///
13325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471)
13326#[inline]
13327#[target_feature(enable = "avx512f,avx512vl")]
13328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13329#[cfg_attr(test, assert_instr(vpmovqw))]
13330pub fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13331 unsafe { transmute(src:vpmovqw256(a.as_i64x4(), src:i16x8::ZERO, mask:k)) }
13332}
13333
13334/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13335///
13336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466)
13337#[inline]
13338#[target_feature(enable = "avx512f,avx512vl")]
13339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13340#[cfg_attr(test, assert_instr(vpmovqw))]
13341pub fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
13342 unsafe { transmute(src:vpmovqw128(a.as_i64x2(), src:i16x8::ZERO, mask:0b11111111)) }
13343}
13344
13345/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13346///
13347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467)
13348#[inline]
13349#[target_feature(enable = "avx512f,avx512vl")]
13350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13351#[cfg_attr(test, assert_instr(vpmovqw))]
13352pub fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13353 unsafe { transmute(src:vpmovqw128(a.as_i64x2(), src.as_i16x8(), mask:k)) }
13354}
13355
13356/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13357///
13358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468)
13359#[inline]
13360#[target_feature(enable = "avx512f,avx512vl")]
13361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13362#[cfg_attr(test, assert_instr(vpmovqw))]
13363pub fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13364 unsafe { transmute(src:vpmovqw128(a.as_i64x2(), src:i16x8::ZERO, mask:k)) }
13365}
13366
13367/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13368///
13369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490)
13370#[inline]
13371#[target_feature(enable = "avx512f")]
13372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13373#[cfg_attr(test, assert_instr(vpmovqb))]
13374pub fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
13375 unsafe { transmute(src:vpmovqb(a.as_i64x8(), src:i8x16::ZERO, mask:0b11111111)) }
13376}
13377
13378/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13379///
13380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491)
13381#[inline]
13382#[target_feature(enable = "avx512f")]
13383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13384#[cfg_attr(test, assert_instr(vpmovqb))]
13385pub fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13386 unsafe { transmute(src:vpmovqb(a.as_i64x8(), src.as_i8x16(), mask:k)) }
13387}
13388
13389/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13390///
13391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492)
13392#[inline]
13393#[target_feature(enable = "avx512f")]
13394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13395#[cfg_attr(test, assert_instr(vpmovqb))]
13396pub fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13397 unsafe { transmute(src:vpmovqb(a.as_i64x8(), src:i8x16::ZERO, mask:k)) }
13398}
13399
13400/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13401///
13402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487)
13403#[inline]
13404#[target_feature(enable = "avx512f,avx512vl")]
13405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13406#[cfg_attr(test, assert_instr(vpmovqb))]
13407pub fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
13408 unsafe { transmute(src:vpmovqb256(a.as_i64x4(), src:i8x16::ZERO, mask:0b11111111)) }
13409}
13410
13411/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13412///
13413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488)
13414#[inline]
13415#[target_feature(enable = "avx512f,avx512vl")]
13416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13417#[cfg_attr(test, assert_instr(vpmovqb))]
13418pub fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13419 unsafe { transmute(src:vpmovqb256(a.as_i64x4(), src.as_i8x16(), mask:k)) }
13420}
13421
13422/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13423///
13424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489)
13425#[inline]
13426#[target_feature(enable = "avx512f,avx512vl")]
13427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13428#[cfg_attr(test, assert_instr(vpmovqb))]
13429pub fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13430 unsafe { transmute(src:vpmovqb256(a.as_i64x4(), src:i8x16::ZERO, mask:k)) }
13431}
13432
13433/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13434///
13435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484)
13436#[inline]
13437#[target_feature(enable = "avx512f,avx512vl")]
13438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13439#[cfg_attr(test, assert_instr(vpmovqb))]
13440pub fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
13441 unsafe { transmute(src:vpmovqb128(a.as_i64x2(), src:i8x16::ZERO, mask:0b11111111)) }
13442}
13443
13444/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13445///
13446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485)
13447#[inline]
13448#[target_feature(enable = "avx512f,avx512vl")]
13449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13450#[cfg_attr(test, assert_instr(vpmovqb))]
13451pub fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13452 unsafe { transmute(src:vpmovqb128(a.as_i64x2(), src.as_i8x16(), mask:k)) }
13453}
13454
13455/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13456///
13457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486)
13458#[inline]
13459#[target_feature(enable = "avx512f,avx512vl")]
13460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13461#[cfg_attr(test, assert_instr(vpmovqb))]
13462pub fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13463 unsafe { transmute(src:vpmovqb128(a.as_i64x2(), src:i8x16::ZERO, mask:k)) }
13464}
13465
13466/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13467///
13468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13469#[inline]
13470#[target_feature(enable = "avx512f")]
13471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13472#[cfg_attr(test, assert_instr(vpmovsdw))]
13473pub fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
13474 unsafe { transmute(src:vpmovsdw(a.as_i32x16(), src:i16x16::ZERO, mask:0b11111111_11111111)) }
13475}
13476
13477/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13478///
13479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820)
13480#[inline]
13481#[target_feature(enable = "avx512f")]
13482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13483#[cfg_attr(test, assert_instr(vpmovsdw))]
13484pub fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13485 unsafe { transmute(src:vpmovsdw(a.as_i32x16(), src.as_i16x16(), mask:k)) }
13486}
13487
13488/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13489///
13490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi16&expand=1819)
13491#[inline]
13492#[target_feature(enable = "avx512f")]
13493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13494#[cfg_attr(test, assert_instr(vpmovsdw))]
13495pub fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13496 unsafe { transmute(src:vpmovsdw(a.as_i32x16(), src:i16x16::ZERO, mask:k)) }
13497}
13498
13499/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13500///
13501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816)
13502#[inline]
13503#[target_feature(enable = "avx512f,avx512vl")]
13504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13505#[cfg_attr(test, assert_instr(vpmovsdw))]
13506pub fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
13507 unsafe { transmute(src:vpmovsdw256(a.as_i32x8(), src:i16x8::ZERO, mask:0b11111111)) }
13508}
13509
13510/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13511///
13512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
13513#[inline]
13514#[target_feature(enable = "avx512f,avx512vl")]
13515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13516#[cfg_attr(test, assert_instr(vpmovsdw))]
13517pub fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13518 unsafe { transmute(src:vpmovsdw256(a.as_i32x8(), src.as_i16x8(), mask:k)) }
13519}
13520
13521/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13522///
13523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
13524#[inline]
13525#[target_feature(enable = "avx512f,avx512vl")]
13526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13527#[cfg_attr(test, assert_instr(vpmovsdw))]
13528pub fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13529 unsafe { transmute(src:vpmovsdw256(a.as_i32x8(), src:i16x8::ZERO, mask:k)) }
13530}
13531
13532/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13533///
13534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813)
13535#[inline]
13536#[target_feature(enable = "avx512f,avx512vl")]
13537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13538#[cfg_attr(test, assert_instr(vpmovsdw))]
13539pub fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
13540 unsafe { transmute(src:vpmovsdw128(a.as_i32x4(), src:i16x8::ZERO, mask:0b11111111)) }
13541}
13542
13543/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13544///
13545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814)
13546#[inline]
13547#[target_feature(enable = "avx512f,avx512vl")]
13548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13549#[cfg_attr(test, assert_instr(vpmovsdw))]
13550pub fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13551 unsafe { transmute(src:vpmovsdw128(a.as_i32x4(), src.as_i16x8(), mask:k)) }
13552}
13553
13554/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13555///
13556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
13557#[inline]
13558#[target_feature(enable = "avx512f,avx512vl")]
13559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13560#[cfg_attr(test, assert_instr(vpmovsdw))]
13561pub fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13562 unsafe { transmute(src:vpmovsdw128(a.as_i32x4(), src:i16x8::ZERO, mask:k)) }
13563}
13564
13565/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13566///
13567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828)
13568#[inline]
13569#[target_feature(enable = "avx512f")]
13570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13571#[cfg_attr(test, assert_instr(vpmovsdb))]
13572pub fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
13573 unsafe { transmute(src:vpmovsdb(a.as_i32x16(), src:i8x16::ZERO, mask:0b11111111_11111111)) }
13574}
13575
13576/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13577///
13578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829)
13579#[inline]
13580#[target_feature(enable = "avx512f")]
13581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13582#[cfg_attr(test, assert_instr(vpmovsdb))]
13583pub fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13584 unsafe { transmute(src:vpmovsdb(a.as_i32x16(), src.as_i8x16(), mask:k)) }
13585}
13586
13587/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13588///
13589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830)
13590#[inline]
13591#[target_feature(enable = "avx512f")]
13592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13593#[cfg_attr(test, assert_instr(vpmovsdb))]
13594pub fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13595 unsafe { transmute(src:vpmovsdb(a.as_i32x16(), src:i8x16::ZERO, mask:k)) }
13596}
13597
13598/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13599///
13600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825)
13601#[inline]
13602#[target_feature(enable = "avx512f,avx512vl")]
13603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13604#[cfg_attr(test, assert_instr(vpmovsdb))]
13605pub fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
13606 unsafe { transmute(src:vpmovsdb256(a.as_i32x8(), src:i8x16::ZERO, mask:0b11111111)) }
13607}
13608
13609/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13610///
13611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
13612#[inline]
13613#[target_feature(enable = "avx512f,avx512vl")]
13614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13615#[cfg_attr(test, assert_instr(vpmovsdb))]
13616pub fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13617 unsafe { transmute(src:vpmovsdb256(a.as_i32x8(), src.as_i8x16(), mask:k)) }
13618}
13619
13620/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13621///
13622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
13623#[inline]
13624#[target_feature(enable = "avx512f,avx512vl")]
13625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13626#[cfg_attr(test, assert_instr(vpmovsdb))]
13627pub fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13628 unsafe { transmute(src:vpmovsdb256(a.as_i32x8(), src:i8x16::ZERO, mask:k)) }
13629}
13630
13631/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13632///
13633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822)
13634#[inline]
13635#[target_feature(enable = "avx512f,avx512vl")]
13636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13637#[cfg_attr(test, assert_instr(vpmovsdb))]
13638pub fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
13639 unsafe { transmute(src:vpmovsdb128(a.as_i32x4(), src:i8x16::ZERO, mask:0b11111111)) }
13640}
13641
13642/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13643///
13644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823)
13645#[inline]
13646#[target_feature(enable = "avx512f,avx512vl")]
13647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13648#[cfg_attr(test, assert_instr(vpmovsdb))]
13649pub fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13650 unsafe { transmute(src:vpmovsdb128(a.as_i32x4(), src.as_i8x16(), mask:k)) }
13651}
13652
13653/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13654///
13655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
13656#[inline]
13657#[target_feature(enable = "avx512f,avx512vl")]
13658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13659#[cfg_attr(test, assert_instr(vpmovsdb))]
13660pub fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13661 unsafe { transmute(src:vpmovsdb128(a.as_i32x4(), src:i8x16::ZERO, mask:k)) }
13662}
13663
13664/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13665///
13666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852)
13667#[inline]
13668#[target_feature(enable = "avx512f")]
13669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13670#[cfg_attr(test, assert_instr(vpmovsqd))]
13671pub fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
13672 unsafe { transmute(src:vpmovsqd(a.as_i64x8(), src:i32x8::ZERO, mask:0b11111111)) }
13673}
13674
13675/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13676///
13677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853)
13678#[inline]
13679#[target_feature(enable = "avx512f")]
13680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13681#[cfg_attr(test, assert_instr(vpmovsqd))]
13682pub fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13683 unsafe { transmute(src:vpmovsqd(a.as_i64x8(), src.as_i32x8(), mask:k)) }
13684}
13685
13686/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13687///
13688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854)
13689#[inline]
13690#[target_feature(enable = "avx512f")]
13691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13692#[cfg_attr(test, assert_instr(vpmovsqd))]
13693pub fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13694 unsafe { transmute(src:vpmovsqd(a.as_i64x8(), src:i32x8::ZERO, mask:k)) }
13695}
13696
13697/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13698///
13699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849)
13700#[inline]
13701#[target_feature(enable = "avx512f,avx512vl")]
13702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13703#[cfg_attr(test, assert_instr(vpmovsqd))]
13704pub fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
13705 unsafe { transmute(src:vpmovsqd256(a.as_i64x4(), src:i32x4::ZERO, mask:0b11111111)) }
13706}
13707
13708/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13709///
13710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
13711#[inline]
13712#[target_feature(enable = "avx512f,avx512vl")]
13713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13714#[cfg_attr(test, assert_instr(vpmovsqd))]
13715pub fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13716 unsafe { transmute(src:vpmovsqd256(a.as_i64x4(), src.as_i32x4(), mask:k)) }
13717}
13718
13719/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13720///
13721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
13722#[inline]
13723#[target_feature(enable = "avx512f,avx512vl")]
13724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13725#[cfg_attr(test, assert_instr(vpmovsqd))]
13726pub fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13727 unsafe { transmute(src:vpmovsqd256(a.as_i64x4(), src:i32x4::ZERO, mask:k)) }
13728}
13729
13730/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13731///
13732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846)
13733#[inline]
13734#[target_feature(enable = "avx512f,avx512vl")]
13735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13736#[cfg_attr(test, assert_instr(vpmovsqd))]
13737pub fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
13738 unsafe { transmute(src:vpmovsqd128(a.as_i64x2(), src:i32x4::ZERO, mask:0b11111111)) }
13739}
13740
13741/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13742///
13743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847)
13744#[inline]
13745#[target_feature(enable = "avx512f,avx512vl")]
13746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13747#[cfg_attr(test, assert_instr(vpmovsqd))]
13748pub fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13749 unsafe { transmute(src:vpmovsqd128(a.as_i64x2(), src.as_i32x4(), mask:k)) }
13750}
13751
13752/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13753///
13754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
13755#[inline]
13756#[target_feature(enable = "avx512f,avx512vl")]
13757#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13758#[cfg_attr(test, assert_instr(vpmovsqd))]
13759pub fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13760 unsafe { transmute(src:vpmovsqd128(a.as_i64x2(), src:i32x4::ZERO, mask:k)) }
13761}
13762
13763/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13764///
13765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843)
13766#[inline]
13767#[target_feature(enable = "avx512f")]
13768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13769#[cfg_attr(test, assert_instr(vpmovsqw))]
13770pub fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
13771 unsafe { transmute(src:vpmovsqw(a.as_i64x8(), src:i16x8::ZERO, mask:0b11111111)) }
13772}
13773
13774/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13775///
13776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844)
13777#[inline]
13778#[target_feature(enable = "avx512f")]
13779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13780#[cfg_attr(test, assert_instr(vpmovsqw))]
13781pub fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13782 unsafe { transmute(src:vpmovsqw(a.as_i64x8(), src.as_i16x8(), mask:k)) }
13783}
13784
13785/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13786///
13787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845)
13788#[inline]
13789#[target_feature(enable = "avx512f")]
13790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13791#[cfg_attr(test, assert_instr(vpmovsqw))]
13792pub fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13793 unsafe { transmute(src:vpmovsqw(a.as_i64x8(), src:i16x8::ZERO, mask:k)) }
13794}
13795
13796/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13797///
13798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840)
13799#[inline]
13800#[target_feature(enable = "avx512f,avx512vl")]
13801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13802#[cfg_attr(test, assert_instr(vpmovsqw))]
13803pub fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
13804 unsafe { transmute(src:vpmovsqw256(a.as_i64x4(), src:i16x8::ZERO, mask:0b11111111)) }
13805}
13806
13807/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13808///
13809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
13810#[inline]
13811#[target_feature(enable = "avx512f,avx512vl")]
13812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13813#[cfg_attr(test, assert_instr(vpmovsqw))]
13814pub fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13815 unsafe { transmute(src:vpmovsqw256(a.as_i64x4(), src.as_i16x8(), mask:k)) }
13816}
13817
13818/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13819///
13820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
13821#[inline]
13822#[target_feature(enable = "avx512f,avx512vl")]
13823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13824#[cfg_attr(test, assert_instr(vpmovsqw))]
13825pub fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13826 unsafe { transmute(src:vpmovsqw256(a.as_i64x4(), src:i16x8::ZERO, mask:k)) }
13827}
13828
13829/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13830///
13831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837)
13832#[inline]
13833#[target_feature(enable = "avx512f,avx512vl")]
13834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13835#[cfg_attr(test, assert_instr(vpmovsqw))]
13836pub fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
13837 unsafe { transmute(src:vpmovsqw128(a.as_i64x2(), src:i16x8::ZERO, mask:0b11111111)) }
13838}
13839
13840/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13841///
13842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838)
13843#[inline]
13844#[target_feature(enable = "avx512f,avx512vl")]
13845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13846#[cfg_attr(test, assert_instr(vpmovsqw))]
13847pub fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13848 unsafe { transmute(src:vpmovsqw128(a.as_i64x2(), src.as_i16x8(), mask:k)) }
13849}
13850
13851/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13852///
13853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
13854#[inline]
13855#[target_feature(enable = "avx512f,avx512vl")]
13856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13857#[cfg_attr(test, assert_instr(vpmovsqw))]
13858pub fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13859 unsafe { transmute(src:vpmovsqw128(a.as_i64x2(), src:i16x8::ZERO, mask:k)) }
13860}
13861
13862/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13863///
13864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861)
13865#[inline]
13866#[target_feature(enable = "avx512f")]
13867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13868#[cfg_attr(test, assert_instr(vpmovsqb))]
13869pub fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
13870 unsafe { transmute(src:vpmovsqb(a.as_i64x8(), src:i8x16::ZERO, mask:0b11111111)) }
13871}
13872
13873/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13874///
13875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862)
13876#[inline]
13877#[target_feature(enable = "avx512f")]
13878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13879#[cfg_attr(test, assert_instr(vpmovsqb))]
13880pub fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13881 unsafe { transmute(src:vpmovsqb(a.as_i64x8(), src.as_i8x16(), mask:k)) }
13882}
13883
13884/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13885///
13886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863)
13887#[inline]
13888#[target_feature(enable = "avx512f")]
13889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13890#[cfg_attr(test, assert_instr(vpmovsqb))]
13891pub fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13892 unsafe { transmute(src:vpmovsqb(a.as_i64x8(), src:i8x16::ZERO, mask:k)) }
13893}
13894
13895/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13896///
13897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858)
13898#[inline]
13899#[target_feature(enable = "avx512f,avx512vl")]
13900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13901#[cfg_attr(test, assert_instr(vpmovsqb))]
13902pub fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
13903 unsafe { transmute(src:vpmovsqb256(a.as_i64x4(), src:i8x16::ZERO, mask:0b11111111)) }
13904}
13905
13906/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13907///
13908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
13909#[inline]
13910#[target_feature(enable = "avx512f,avx512vl")]
13911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13912#[cfg_attr(test, assert_instr(vpmovsqb))]
13913pub fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13914 unsafe { transmute(src:vpmovsqb256(a.as_i64x4(), src.as_i8x16(), mask:k)) }
13915}
13916
13917/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13918///
13919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
13920#[inline]
13921#[target_feature(enable = "avx512f,avx512vl")]
13922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13923#[cfg_attr(test, assert_instr(vpmovsqb))]
13924pub fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13925 unsafe { transmute(src:vpmovsqb256(a.as_i64x4(), src:i8x16::ZERO, mask:k)) }
13926}
13927
13928/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13929///
13930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855)
13931#[inline]
13932#[target_feature(enable = "avx512f,avx512vl")]
13933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13934#[cfg_attr(test, assert_instr(vpmovsqb))]
13935pub fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
13936 unsafe { transmute(src:vpmovsqb128(a.as_i64x2(), src:i8x16::ZERO, mask:0b11111111)) }
13937}
13938
13939/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13940///
13941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856)
13942#[inline]
13943#[target_feature(enable = "avx512f,avx512vl")]
13944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13945#[cfg_attr(test, assert_instr(vpmovsqb))]
13946pub fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13947 unsafe { transmute(src:vpmovsqb128(a.as_i64x2(), src.as_i8x16(), mask:k)) }
13948}
13949
13950/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13951///
13952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
13953#[inline]
13954#[target_feature(enable = "avx512f,avx512vl")]
13955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13956#[cfg_attr(test, assert_instr(vpmovsqb))]
13957pub fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13958 unsafe { transmute(src:vpmovsqb128(a.as_i64x2(), src:i8x16::ZERO, mask:k)) }
13959}
13960
13961/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13962///
13963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054)
13964#[inline]
13965#[target_feature(enable = "avx512f")]
13966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13967#[cfg_attr(test, assert_instr(vpmovusdw))]
13968pub fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
13969 unsafe { transmute(src:vpmovusdw(a.as_u32x16(), src:u16x16::ZERO, mask:0b11111111_11111111)) }
13970}
13971
13972/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13973///
13974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055)
13975#[inline]
13976#[target_feature(enable = "avx512f")]
13977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13978#[cfg_attr(test, assert_instr(vpmovusdw))]
13979pub fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13980 unsafe { transmute(src:vpmovusdw(a.as_u32x16(), src.as_u16x16(), mask:k)) }
13981}
13982
13983/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13984///
13985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056)
13986#[inline]
13987#[target_feature(enable = "avx512f")]
13988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
13989#[cfg_attr(test, assert_instr(vpmovusdw))]
13990pub fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13991 unsafe { transmute(src:vpmovusdw(a.as_u32x16(), src:u16x16::ZERO, mask:k)) }
13992}
13993
13994/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13995///
13996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051)
13997#[inline]
13998#[target_feature(enable = "avx512f,avx512vl")]
13999#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14000#[cfg_attr(test, assert_instr(vpmovusdw))]
14001pub fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
14002 unsafe { transmute(src:vpmovusdw256(a.as_u32x8(), src:u16x8::ZERO, mask:0b11111111)) }
14003}
14004
14005/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14006///
14007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
14008#[inline]
14009#[target_feature(enable = "avx512f,avx512vl")]
14010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14011#[cfg_attr(test, assert_instr(vpmovusdw))]
14012pub fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14013 unsafe { transmute(src:vpmovusdw256(a.as_u32x8(), src.as_u16x8(), mask:k)) }
14014}
14015
14016/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14017///
14018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
14019#[inline]
14020#[target_feature(enable = "avx512f,avx512vl")]
14021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14022#[cfg_attr(test, assert_instr(vpmovusdw))]
14023pub fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
14024 unsafe { transmute(src:vpmovusdw256(a.as_u32x8(), src:u16x8::ZERO, mask:k)) }
14025}
14026
14027/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14028///
14029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048)
14030#[inline]
14031#[target_feature(enable = "avx512f,avx512vl")]
14032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14033#[cfg_attr(test, assert_instr(vpmovusdw))]
14034pub fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
14035 unsafe { transmute(src:vpmovusdw128(a.as_u32x4(), src:u16x8::ZERO, mask:0b11111111)) }
14036}
14037
14038/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14039///
14040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049)
14041#[inline]
14042#[target_feature(enable = "avx512f,avx512vl")]
14043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14044#[cfg_attr(test, assert_instr(vpmovusdw))]
14045pub fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14046 unsafe { transmute(src:vpmovusdw128(a.as_u32x4(), src.as_u16x8(), mask:k)) }
14047}
14048
14049/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14050///
14051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
14052#[inline]
14053#[target_feature(enable = "avx512f,avx512vl")]
14054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14055#[cfg_attr(test, assert_instr(vpmovusdw))]
14056pub fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
14057 unsafe { transmute(src:vpmovusdw128(a.as_u32x4(), src:u16x8::ZERO, mask:k)) }
14058}
14059
14060/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14061///
14062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063)
14063#[inline]
14064#[target_feature(enable = "avx512f")]
14065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14066#[cfg_attr(test, assert_instr(vpmovusdb))]
14067pub fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
14068 unsafe { transmute(src:vpmovusdb(a.as_u32x16(), src:u8x16::ZERO, mask:0b11111111_11111111)) }
14069}
14070
14071/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14072///
14073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064)
14074#[inline]
14075#[target_feature(enable = "avx512f")]
14076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14077#[cfg_attr(test, assert_instr(vpmovusdb))]
14078pub fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
14079 unsafe { transmute(src:vpmovusdb(a.as_u32x16(), src.as_u8x16(), mask:k)) }
14080}
14081
14082/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14083///
14084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065)
14085#[inline]
14086#[target_feature(enable = "avx512f")]
14087#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14088#[cfg_attr(test, assert_instr(vpmovusdb))]
14089pub fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
14090 unsafe { transmute(src:vpmovusdb(a.as_u32x16(), src:u8x16::ZERO, mask:k)) }
14091}
14092
14093/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14094///
14095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060)
14096#[inline]
14097#[target_feature(enable = "avx512f,avx512vl")]
14098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14099#[cfg_attr(test, assert_instr(vpmovusdb))]
14100pub fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
14101 unsafe { transmute(src:vpmovusdb256(a.as_u32x8(), src:u8x16::ZERO, mask:0b11111111)) }
14102}
14103
14104/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14105///
14106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
14107#[inline]
14108#[target_feature(enable = "avx512f,avx512vl")]
14109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14110#[cfg_attr(test, assert_instr(vpmovusdb))]
14111pub fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14112 unsafe { transmute(src:vpmovusdb256(a.as_u32x8(), src.as_u8x16(), mask:k)) }
14113}
14114
14115/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14116///
14117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
14118#[inline]
14119#[target_feature(enable = "avx512f,avx512vl")]
14120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14121#[cfg_attr(test, assert_instr(vpmovusdb))]
14122pub fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14123 unsafe { transmute(src:vpmovusdb256(a.as_u32x8(), src:u8x16::ZERO, mask:k)) }
14124}
14125
14126/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14127///
14128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057)
14129#[inline]
14130#[target_feature(enable = "avx512f,avx512vl")]
14131#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14132#[cfg_attr(test, assert_instr(vpmovusdb))]
14133pub fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
14134 unsafe { transmute(src:vpmovusdb128(a.as_u32x4(), src:u8x16::ZERO, mask:0b11111111)) }
14135}
14136
14137/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14138///
14139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058)
14140#[inline]
14141#[target_feature(enable = "avx512f,avx512vl")]
14142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14143#[cfg_attr(test, assert_instr(vpmovusdb))]
14144pub fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14145 unsafe { transmute(src:vpmovusdb128(a.as_u32x4(), src.as_u8x16(), mask:k)) }
14146}
14147
14148/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14149///
14150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
14151#[inline]
14152#[target_feature(enable = "avx512f,avx512vl")]
14153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14154#[cfg_attr(test, assert_instr(vpmovusdb))]
14155pub fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14156 unsafe { transmute(src:vpmovusdb128(a.as_u32x4(), src:u8x16::ZERO, mask:k)) }
14157}
14158
14159/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14160///
14161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087)
14162#[inline]
14163#[target_feature(enable = "avx512f")]
14164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14165#[cfg_attr(test, assert_instr(vpmovusqd))]
14166pub fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
14167 unsafe { transmute(src:vpmovusqd(a.as_u64x8(), src:u32x8::ZERO, mask:0b11111111)) }
14168}
14169
14170/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14171///
14172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088)
14173#[inline]
14174#[target_feature(enable = "avx512f")]
14175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14176#[cfg_attr(test, assert_instr(vpmovusqd))]
14177pub fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14178 unsafe { transmute(src:vpmovusqd(a.as_u64x8(), src.as_u32x8(), mask:k)) }
14179}
14180
14181/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14182///
14183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089)
14184#[inline]
14185#[target_feature(enable = "avx512f")]
14186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14187#[cfg_attr(test, assert_instr(vpmovusqd))]
14188pub fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14189 unsafe { transmute(src:vpmovusqd(a.as_u64x8(), src:u32x8::ZERO, mask:k)) }
14190}
14191
14192/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14193///
14194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084)
14195#[inline]
14196#[target_feature(enable = "avx512f,avx512vl")]
14197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14198#[cfg_attr(test, assert_instr(vpmovusqd))]
14199pub fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
14200 unsafe { transmute(src:vpmovusqd256(a.as_u64x4(), src:u32x4::ZERO, mask:0b11111111)) }
14201}
14202
14203/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14204///
14205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
14206#[inline]
14207#[target_feature(enable = "avx512f,avx512vl")]
14208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14209#[cfg_attr(test, assert_instr(vpmovusqd))]
14210pub fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14211 unsafe { transmute(src:vpmovusqd256(a.as_u64x4(), src.as_u32x4(), mask:k)) }
14212}
14213
14214/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14215///
14216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
14217#[inline]
14218#[target_feature(enable = "avx512f,avx512vl")]
14219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14220#[cfg_attr(test, assert_instr(vpmovusqd))]
14221pub fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14222 unsafe { transmute(src:vpmovusqd256(a.as_u64x4(), src:u32x4::ZERO, mask:k)) }
14223}
14224
14225/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14226///
14227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081)
14228#[inline]
14229#[target_feature(enable = "avx512f,avx512vl")]
14230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14231#[cfg_attr(test, assert_instr(vpmovusqd))]
14232pub fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
14233 unsafe { transmute(src:vpmovusqd128(a.as_u64x2(), src:u32x4::ZERO, mask:0b11111111)) }
14234}
14235
14236/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14237///
14238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082)
14239#[inline]
14240#[target_feature(enable = "avx512f,avx512vl")]
14241#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14242#[cfg_attr(test, assert_instr(vpmovusqd))]
14243pub fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14244 unsafe { transmute(src:vpmovusqd128(a.as_u64x2(), src.as_u32x4(), mask:k)) }
14245}
14246
14247/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14248///
14249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
14250#[inline]
14251#[target_feature(enable = "avx512f,avx512vl")]
14252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14253#[cfg_attr(test, assert_instr(vpmovusqd))]
14254pub fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14255 unsafe { transmute(src:vpmovusqd128(a.as_u64x2(), src:u32x4::ZERO, mask:k)) }
14256}
14257
14258/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14259///
14260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078)
14261#[inline]
14262#[target_feature(enable = "avx512f")]
14263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14264#[cfg_attr(test, assert_instr(vpmovusqw))]
14265pub fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
14266 unsafe { transmute(src:vpmovusqw(a.as_u64x8(), src:u16x8::ZERO, mask:0b11111111)) }
14267}
14268
14269/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14270///
14271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079)
14272#[inline]
14273#[target_feature(enable = "avx512f")]
14274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14275#[cfg_attr(test, assert_instr(vpmovusqw))]
14276pub fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14277 unsafe { transmute(src:vpmovusqw(a.as_u64x8(), src.as_u16x8(), mask:k)) }
14278}
14279
14280/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14281///
14282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080)
14283#[inline]
14284#[target_feature(enable = "avx512f")]
14285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14286#[cfg_attr(test, assert_instr(vpmovusqw))]
14287pub fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14288 unsafe { transmute(src:vpmovusqw(a.as_u64x8(), src:u16x8::ZERO, mask:k)) }
14289}
14290
14291/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14292///
14293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075)
14294#[inline]
14295#[target_feature(enable = "avx512f,avx512vl")]
14296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14297#[cfg_attr(test, assert_instr(vpmovusqw))]
14298pub fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
14299 unsafe { transmute(src:vpmovusqw256(a.as_u64x4(), src:u16x8::ZERO, mask:0b11111111)) }
14300}
14301
14302/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14303///
14304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
14305#[inline]
14306#[target_feature(enable = "avx512f,avx512vl")]
14307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14308#[cfg_attr(test, assert_instr(vpmovusqw))]
14309pub fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14310 unsafe { transmute(src:vpmovusqw256(a.as_u64x4(), src.as_u16x8(), mask:k)) }
14311}
14312
14313/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14314///
14315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
14316#[inline]
14317#[target_feature(enable = "avx512f,avx512vl")]
14318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14319#[cfg_attr(test, assert_instr(vpmovusqw))]
14320pub fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14321 unsafe { transmute(src:vpmovusqw256(a.as_u64x4(), src:u16x8::ZERO, mask:k)) }
14322}
14323
14324/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14325///
14326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072)
14327#[inline]
14328#[target_feature(enable = "avx512f,avx512vl")]
14329#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14330#[cfg_attr(test, assert_instr(vpmovusqw))]
14331pub fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
14332 unsafe { transmute(src:vpmovusqw128(a.as_u64x2(), src:u16x8::ZERO, mask:0b11111111)) }
14333}
14334
14335/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14336///
14337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073)
14338#[inline]
14339#[target_feature(enable = "avx512f,avx512vl")]
14340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14341#[cfg_attr(test, assert_instr(vpmovusqw))]
14342pub fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14343 unsafe { transmute(src:vpmovusqw128(a.as_u64x2(), src.as_u16x8(), mask:k)) }
14344}
14345
14346/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14347///
14348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
14349#[inline]
14350#[target_feature(enable = "avx512f,avx512vl")]
14351#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14352#[cfg_attr(test, assert_instr(vpmovusqw))]
14353pub fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14354 unsafe { transmute(src:vpmovusqw128(a.as_u64x2(), src:u16x8::ZERO, mask:k)) }
14355}
14356
14357/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14358///
14359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096)
14360#[inline]
14361#[target_feature(enable = "avx512f")]
14362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14363#[cfg_attr(test, assert_instr(vpmovusqb))]
14364pub fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
14365 unsafe { transmute(src:vpmovusqb(a.as_u64x8(), src:u8x16::ZERO, mask:0b11111111)) }
14366}
14367
14368/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14369///
14370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097)
14371#[inline]
14372#[target_feature(enable = "avx512f")]
14373#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14374#[cfg_attr(test, assert_instr(vpmovusqb))]
14375pub fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14376 unsafe { transmute(src:vpmovusqb(a.as_u64x8(), src.as_u8x16(), mask:k)) }
14377}
14378
14379/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14380///
14381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098)
14382#[inline]
14383#[target_feature(enable = "avx512f")]
14384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14385#[cfg_attr(test, assert_instr(vpmovusqb))]
14386pub fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14387 unsafe { transmute(src:vpmovusqb(a.as_u64x8(), src:u8x16::ZERO, mask:k)) }
14388}
14389
14390/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14391///
14392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093)
14393#[inline]
14394#[target_feature(enable = "avx512f,avx512vl")]
14395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14396#[cfg_attr(test, assert_instr(vpmovusqb))]
14397pub fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
14398 unsafe { transmute(src:vpmovusqb256(a.as_u64x4(), src:u8x16::ZERO, mask:0b11111111)) }
14399}
14400
14401/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14402///
14403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
14404#[inline]
14405#[target_feature(enable = "avx512f,avx512vl")]
14406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14407#[cfg_attr(test, assert_instr(vpmovusqb))]
14408pub fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14409 unsafe { transmute(src:vpmovusqb256(a.as_u64x4(), src.as_u8x16(), mask:k)) }
14410}
14411
14412/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14413///
14414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
14415#[inline]
14416#[target_feature(enable = "avx512f,avx512vl")]
14417#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14418#[cfg_attr(test, assert_instr(vpmovusqb))]
14419pub fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14420 unsafe { transmute(src:vpmovusqb256(a.as_u64x4(), src:u8x16::ZERO, mask:k)) }
14421}
14422
14423/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14424///
14425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090)
14426#[inline]
14427#[target_feature(enable = "avx512f,avx512vl")]
14428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14429#[cfg_attr(test, assert_instr(vpmovusqb))]
14430pub fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
14431 unsafe { transmute(src:vpmovusqb128(a.as_u64x2(), src:u8x16::ZERO, mask:0b11111111)) }
14432}
14433
14434/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14435///
14436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091)
14437#[inline]
14438#[target_feature(enable = "avx512f,avx512vl")]
14439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14440#[cfg_attr(test, assert_instr(vpmovusqb))]
14441pub fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14442 unsafe { transmute(src:vpmovusqb128(a.as_u64x2(), src.as_u8x16(), mask:k)) }
14443}
14444
14445/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14446///
14447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
14448#[inline]
14449#[target_feature(enable = "avx512f,avx512vl")]
14450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14451#[cfg_attr(test, assert_instr(vpmovusqb))]
14452pub fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14453 unsafe { transmute(src:vpmovusqb128(a.as_u64x2(), src:u8x16::ZERO, mask:k)) }
14454}
14455
14456/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
14457///
14458/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
14459/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14460/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14461/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14462/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14463/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14464///
14465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335)
14466#[inline]
14467#[target_feature(enable = "avx512f")]
14468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14469#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14470#[rustc_legacy_const_generics(1)]
14471pub fn _mm512_cvt_roundps_epi32<const ROUNDING: i32>(a: __m512) -> __m512i {
14472 unsafe {
14473 static_assert_rounding!(ROUNDING);
14474 let a: f32x16 = a.as_f32x16();
14475 let r: i32x16 = vcvtps2dq(a, src:i32x16::ZERO, mask:0b11111111_11111111, ROUNDING);
14476 transmute(src:r)
14477 }
14478}
14479
14480/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14481///
14482/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14483/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14484/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14485/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14486/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14487/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14488///
14489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14490#[inline]
14491#[target_feature(enable = "avx512f")]
14492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14493#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14494#[rustc_legacy_const_generics(3)]
14495pub fn _mm512_mask_cvt_roundps_epi32<const ROUNDING: i32>(
14496 src: __m512i,
14497 k: __mmask16,
14498 a: __m512,
14499) -> __m512i {
14500 unsafe {
14501 static_assert_rounding!(ROUNDING);
14502 let a: f32x16 = a.as_f32x16();
14503 let src: i32x16 = src.as_i32x16();
14504 let r: i32x16 = vcvtps2dq(a, src, mask:k, ROUNDING);
14505 transmute(src:r)
14506 }
14507}
14508
14509/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14510///
14511/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14512/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14513/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14514/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14515/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14516/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14517///
14518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14519#[inline]
14520#[target_feature(enable = "avx512f")]
14521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14522#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14523#[rustc_legacy_const_generics(2)]
14524pub fn _mm512_maskz_cvt_roundps_epi32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14525 unsafe {
14526 static_assert_rounding!(ROUNDING);
14527 let a: f32x16 = a.as_f32x16();
14528 let r: i32x16 = vcvtps2dq(a, src:i32x16::ZERO, mask:k, ROUNDING);
14529 transmute(src:r)
14530 }
14531}
14532
14533/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14534///
14535/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14536/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14537/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14538/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14539/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14540/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14541///
14542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341)
14543#[inline]
14544#[target_feature(enable = "avx512f")]
14545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14546#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14547#[rustc_legacy_const_generics(1)]
14548pub fn _mm512_cvt_roundps_epu32<const ROUNDING: i32>(a: __m512) -> __m512i {
14549 unsafe {
14550 static_assert_rounding!(ROUNDING);
14551 let a: f32x16 = a.as_f32x16();
14552 let r: u32x16 = vcvtps2udq(a, src:u32x16::ZERO, mask:0b11111111_11111111, ROUNDING);
14553 transmute(src:r)
14554 }
14555}
14556
14557/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14558///
14559/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14560/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14561/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14562/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14563/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14564/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14565///
14566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342)
14567#[inline]
14568#[target_feature(enable = "avx512f")]
14569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14570#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14571#[rustc_legacy_const_generics(3)]
14572pub fn _mm512_mask_cvt_roundps_epu32<const ROUNDING: i32>(
14573 src: __m512i,
14574 k: __mmask16,
14575 a: __m512,
14576) -> __m512i {
14577 unsafe {
14578 static_assert_rounding!(ROUNDING);
14579 let a: f32x16 = a.as_f32x16();
14580 let src: u32x16 = src.as_u32x16();
14581 let r: u32x16 = vcvtps2udq(a, src, mask:k, ROUNDING);
14582 transmute(src:r)
14583 }
14584}
14585
14586/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14587///
14588/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14589/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14590/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14591/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14592/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14593/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14594///
14595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
14596#[inline]
14597#[target_feature(enable = "avx512f")]
14598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14599#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14600#[rustc_legacy_const_generics(2)]
14601pub fn _mm512_maskz_cvt_roundps_epu32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14602 unsafe {
14603 static_assert_rounding!(ROUNDING);
14604 let a: f32x16 = a.as_f32x16();
14605 let r: u32x16 = vcvtps2udq(a, src:u32x16::ZERO, mask:k, ROUNDING);
14606 transmute(src:r)
14607 }
14608}
14609
14610/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
14611/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14612///
14613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347)
14614#[inline]
14615#[target_feature(enable = "avx512f")]
14616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14617#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14618#[rustc_legacy_const_generics(1)]
14619pub fn _mm512_cvt_roundps_pd<const SAE: i32>(a: __m256) -> __m512d {
14620 unsafe {
14621 static_assert_sae!(SAE);
14622 let a: f32x8 = a.as_f32x8();
14623 let r: f64x8 = vcvtps2pd(a, src:f64x8::ZERO, mask:0b11111111, SAE);
14624 transmute(src:r)
14625 }
14626}
14627
14628/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14629/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14630///
14631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_pd&expand=1336)
14632#[inline]
14633#[target_feature(enable = "avx512f")]
14634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14635#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14636#[rustc_legacy_const_generics(3)]
14637pub fn _mm512_mask_cvt_roundps_pd<const SAE: i32>(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
14638 unsafe {
14639 static_assert_sae!(SAE);
14640 let a: f32x8 = a.as_f32x8();
14641 let src: f64x8 = src.as_f64x8();
14642 let r: f64x8 = vcvtps2pd(a, src, mask:k, SAE);
14643 transmute(src:r)
14644 }
14645}
14646
14647/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14648/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14649///
14650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_pd&expand=1337)
14651#[inline]
14652#[target_feature(enable = "avx512f")]
14653#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14654#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14655#[rustc_legacy_const_generics(2)]
14656pub fn _mm512_maskz_cvt_roundps_pd<const SAE: i32>(k: __mmask8, a: __m256) -> __m512d {
14657 unsafe {
14658 static_assert_sae!(SAE);
14659 let a: f32x8 = a.as_f32x8();
14660 let r: f64x8 = vcvtps2pd(a, src:f64x8::ZERO, mask:k, SAE);
14661 transmute(src:r)
14662 }
14663}
14664
14665/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
14666///
14667/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14668/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14669/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14670/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14671/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14672/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14673///
14674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315)
14675#[inline]
14676#[target_feature(enable = "avx512f")]
14677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14678#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14679#[rustc_legacy_const_generics(1)]
14680pub fn _mm512_cvt_roundpd_epi32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14681 unsafe {
14682 static_assert_rounding!(ROUNDING);
14683 let a: f64x8 = a.as_f64x8();
14684 let r: i32x8 = vcvtpd2dq(a, src:i32x8::ZERO, mask:0b11111111, ROUNDING);
14685 transmute(src:r)
14686 }
14687}
14688
14689/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14690///
14691/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14692/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14693/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14694/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14695/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14696/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14697///
14698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316)
14699#[inline]
14700#[target_feature(enable = "avx512f")]
14701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14702#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14703#[rustc_legacy_const_generics(3)]
14704pub fn _mm512_mask_cvt_roundpd_epi32<const ROUNDING: i32>(
14705 src: __m256i,
14706 k: __mmask8,
14707 a: __m512d,
14708) -> __m256i {
14709 unsafe {
14710 static_assert_rounding!(ROUNDING);
14711 let a: f64x8 = a.as_f64x8();
14712 let src: i32x8 = src.as_i32x8();
14713 let r: i32x8 = vcvtpd2dq(a, src, mask:k, ROUNDING);
14714 transmute(src:r)
14715 }
14716}
14717
14718/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14719///
14720/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14721/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14722/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14723/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14724/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14725/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14726///
14727/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317)
14728#[inline]
14729#[target_feature(enable = "avx512f")]
14730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14731#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14732#[rustc_legacy_const_generics(2)]
14733pub fn _mm512_maskz_cvt_roundpd_epi32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14734 unsafe {
14735 static_assert_rounding!(ROUNDING);
14736 let a: f64x8 = a.as_f64x8();
14737 let r: i32x8 = vcvtpd2dq(a, src:i32x8::ZERO, mask:k, ROUNDING);
14738 transmute(src:r)
14739 }
14740}
14741
14742/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14743///
14744/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14745/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14746/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14747/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14748/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14749/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14750///
14751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321)
14752#[inline]
14753#[target_feature(enable = "avx512f")]
14754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14755#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14756#[rustc_legacy_const_generics(1)]
14757pub fn _mm512_cvt_roundpd_epu32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14758 unsafe {
14759 static_assert_rounding!(ROUNDING);
14760 let a: f64x8 = a.as_f64x8();
14761 let r: u32x8 = vcvtpd2udq(a, src:u32x8::ZERO, mask:0b11111111, ROUNDING);
14762 transmute(src:r)
14763 }
14764}
14765
14766/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14767///
14768/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14769/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14770/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14771/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14772/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14773/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14774///
14775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322)
14776#[inline]
14777#[target_feature(enable = "avx512f")]
14778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14779#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14780#[rustc_legacy_const_generics(3)]
14781pub fn _mm512_mask_cvt_roundpd_epu32<const ROUNDING: i32>(
14782 src: __m256i,
14783 k: __mmask8,
14784 a: __m512d,
14785) -> __m256i {
14786 unsafe {
14787 static_assert_rounding!(ROUNDING);
14788 let a: f64x8 = a.as_f64x8();
14789 let src: u32x8 = src.as_u32x8();
14790 let r: u32x8 = vcvtpd2udq(a, src, mask:k, ROUNDING);
14791 transmute(src:r)
14792 }
14793}
14794
14795/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14796///
14797/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14798/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14799/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14800/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14801/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14802/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14803///
14804/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323)
14805#[inline]
14806#[target_feature(enable = "avx512f")]
14807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14808#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14809#[rustc_legacy_const_generics(2)]
14810pub fn _mm512_maskz_cvt_roundpd_epu32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14811 unsafe {
14812 static_assert_rounding!(ROUNDING);
14813 let a: f64x8 = a.as_f64x8();
14814 let r: u32x8 = vcvtpd2udq(a, src:u32x8::ZERO, mask:k, ROUNDING);
14815 transmute(src:r)
14816 }
14817}
14818
14819/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14820///
14821/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14822/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14823/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14824/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14825/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14826/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14827///
14828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327)
14829#[inline]
14830#[target_feature(enable = "avx512f")]
14831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14832#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14833#[rustc_legacy_const_generics(1)]
14834pub fn _mm512_cvt_roundpd_ps<const ROUNDING: i32>(a: __m512d) -> __m256 {
14835 unsafe {
14836 static_assert_rounding!(ROUNDING);
14837 let a: f64x8 = a.as_f64x8();
14838 let r: f32x8 = vcvtpd2ps(a, src:f32x8::ZERO, mask:0b11111111, ROUNDING);
14839 transmute(src:r)
14840 }
14841}
14842
14843/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14844///
14845/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14846/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14847/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14848/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14849/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14850/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14851///
14852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328)
14853#[inline]
14854#[target_feature(enable = "avx512f")]
14855#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14856#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14857#[rustc_legacy_const_generics(3)]
14858pub fn _mm512_mask_cvt_roundpd_ps<const ROUNDING: i32>(
14859 src: __m256,
14860 k: __mmask8,
14861 a: __m512d,
14862) -> __m256 {
14863 unsafe {
14864 static_assert_rounding!(ROUNDING);
14865 let a: f64x8 = a.as_f64x8();
14866 let src: f32x8 = src.as_f32x8();
14867 let r: f32x8 = vcvtpd2ps(a, src, mask:k, ROUNDING);
14868 transmute(src:r)
14869 }
14870}
14871
14872/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14873///
14874/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14875/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14876/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14877/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14878/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14879/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14880///
14881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329)
14882#[inline]
14883#[target_feature(enable = "avx512f")]
14884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14885#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14886#[rustc_legacy_const_generics(2)]
14887pub fn _mm512_maskz_cvt_roundpd_ps<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256 {
14888 unsafe {
14889 static_assert_rounding!(ROUNDING);
14890 let a: f64x8 = a.as_f64x8();
14891 let r: f32x8 = vcvtpd2ps(a, src:f32x8::ZERO, mask:k, ROUNDING);
14892 transmute(src:r)
14893 }
14894}
14895
14896/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14897///
14898/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14904///
14905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294)
14906#[inline]
14907#[target_feature(enable = "avx512f")]
14908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14909#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14910#[rustc_legacy_const_generics(1)]
14911pub fn _mm512_cvt_roundepi32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14912 unsafe {
14913 static_assert_rounding!(ROUNDING);
14914 let a: i32x16 = a.as_i32x16();
14915 let r: f32x16 = vcvtdq2ps(a, ROUNDING);
14916 transmute(src:r)
14917 }
14918}
14919
14920/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14921///
14922/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14923/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14924/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14925/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14926/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14927/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14928///
14929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295)
14930#[inline]
14931#[target_feature(enable = "avx512f")]
14932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14933#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14934#[rustc_legacy_const_generics(3)]
14935pub fn _mm512_mask_cvt_roundepi32_ps<const ROUNDING: i32>(
14936 src: __m512,
14937 k: __mmask16,
14938 a: __m512i,
14939) -> __m512 {
14940 unsafe {
14941 static_assert_rounding!(ROUNDING);
14942 let a: i32x16 = a.as_i32x16();
14943 let r: f32x16 = vcvtdq2ps(a, ROUNDING);
14944 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
14945 }
14946}
14947
14948/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14949///
14950/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14951/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14952/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14953/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14954/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14955/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14956///
14957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296)
14958#[inline]
14959#[target_feature(enable = "avx512f")]
14960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14961#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14962#[rustc_legacy_const_generics(2)]
14963pub fn _mm512_maskz_cvt_roundepi32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
14964 unsafe {
14965 static_assert_rounding!(ROUNDING);
14966 let a: i32x16 = a.as_i32x16();
14967 let r: f32x16 = vcvtdq2ps(a, ROUNDING);
14968 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
14969 }
14970}
14971
14972/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14973///
14974/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14975/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14976/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14977/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14978/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14979/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14980///
14981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303)
14982#[inline]
14983#[target_feature(enable = "avx512f")]
14984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
14985#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
14986#[rustc_legacy_const_generics(1)]
14987pub fn _mm512_cvt_roundepu32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14988 unsafe {
14989 static_assert_rounding!(ROUNDING);
14990 let a: u32x16 = a.as_u32x16();
14991 let r: f32x16 = vcvtudq2ps(a, ROUNDING);
14992 transmute(src:r)
14993 }
14994}
14995
14996/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14997///
14998/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14999/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15000/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15001/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15002/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15003/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15004///
15005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304)
15006#[inline]
15007#[target_feature(enable = "avx512f")]
15008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15009#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15010#[rustc_legacy_const_generics(3)]
15011pub fn _mm512_mask_cvt_roundepu32_ps<const ROUNDING: i32>(
15012 src: __m512,
15013 k: __mmask16,
15014 a: __m512i,
15015) -> __m512 {
15016 unsafe {
15017 static_assert_rounding!(ROUNDING);
15018 let a: u32x16 = a.as_u32x16();
15019 let r: f32x16 = vcvtudq2ps(a, ROUNDING);
15020 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
15021 }
15022}
15023
15024/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15025///
15026/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15027/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15028/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15029/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15030/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15031/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15032///
15033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305)
15034#[inline]
15035#[target_feature(enable = "avx512f")]
15036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15037#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15038#[rustc_legacy_const_generics(2)]
15039pub fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15040 unsafe {
15041 static_assert_rounding!(ROUNDING);
15042 let a: u32x16 = a.as_u32x16();
15043 let r: f32x16 = vcvtudq2ps(a, ROUNDING);
15044 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
15045 }
15046}
15047
15048/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15049/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15050/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15051/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15052/// * [`_MM_FROUND_TO_POS_INF`] // round up
15053/// * [`_MM_FROUND_TO_ZERO`] // truncate
15054/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15055/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15056/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15057/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15058/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15059/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15060///
15061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
15062#[inline]
15063#[target_feature(enable = "avx512f")]
15064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15065#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15066#[rustc_legacy_const_generics(1)]
15067pub fn _mm512_cvt_roundps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15068 unsafe {
15069 static_assert_extended_rounding!(ROUNDING);
15070 let a: f32x16 = a.as_f32x16();
15071 let r: i16x16 = vcvtps2ph(a, ROUNDING, src:i16x16::ZERO, mask:0b11111111_11111111);
15072 transmute(src:r)
15073 }
15074}
15075
15076/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15077/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15078/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15079/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15080/// * [`_MM_FROUND_TO_POS_INF`] // round up
15081/// * [`_MM_FROUND_TO_ZERO`] // truncate
15082/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15083/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15084/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15085/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15086/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15087/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15088///
15089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
15090#[inline]
15091#[target_feature(enable = "avx512f")]
15092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15093#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15094#[rustc_legacy_const_generics(3)]
15095pub fn _mm512_mask_cvt_roundps_ph<const ROUNDING: i32>(
15096 src: __m256i,
15097 k: __mmask16,
15098 a: __m512,
15099) -> __m256i {
15100 unsafe {
15101 static_assert_extended_rounding!(ROUNDING);
15102 let a: f32x16 = a.as_f32x16();
15103 let src: i16x16 = src.as_i16x16();
15104 let r: i16x16 = vcvtps2ph(a, ROUNDING, src, mask:k);
15105 transmute(src:r)
15106 }
15107}
15108
15109/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15110/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15111/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15112/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15113/// * [`_MM_FROUND_TO_POS_INF`] // round up
15114/// * [`_MM_FROUND_TO_ZERO`] // truncate
15115/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15116/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15117/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15118/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15119/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15120/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15121///
15122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
15123#[inline]
15124#[target_feature(enable = "avx512f")]
15125#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15126#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15127#[rustc_legacy_const_generics(2)]
15128pub fn _mm512_maskz_cvt_roundps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15129 unsafe {
15130 static_assert_extended_rounding!(ROUNDING);
15131 let a: f32x16 = a.as_f32x16();
15132 let r: i16x16 = vcvtps2ph(a, ROUNDING, src:i16x16::ZERO, mask:k);
15133 transmute(src:r)
15134 }
15135}
15136
15137/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15138/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
15139/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15140/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15141/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15142/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15143/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15144///
15145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352)
15146#[inline]
15147#[target_feature(enable = "avx512f,avx512vl")]
15148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15149#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15150#[rustc_legacy_const_generics(3)]
15151pub fn _mm256_mask_cvt_roundps_ph<const IMM8: i32>(
15152 src: __m128i,
15153 k: __mmask8,
15154 a: __m256,
15155) -> __m128i {
15156 unsafe {
15157 static_assert_uimm_bits!(IMM8, 8);
15158 let a: f32x8 = a.as_f32x8();
15159 let src: i16x8 = src.as_i16x8();
15160 let r: i16x8 = vcvtps2ph256(a, IMM8, src, mask:k);
15161 transmute(src:r)
15162 }
15163}
15164
15165/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15166/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15167/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15168/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15169/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15170/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15171/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15172///
15173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
15174#[inline]
15175#[target_feature(enable = "avx512f,avx512vl")]
15176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15177#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15178#[rustc_legacy_const_generics(2)]
15179pub fn _mm256_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15180 unsafe {
15181 static_assert_uimm_bits!(IMM8, 8);
15182 let a: f32x8 = a.as_f32x8();
15183 let r: i16x8 = vcvtps2ph256(a, IMM8, src:i16x8::ZERO, mask:k);
15184 transmute(src:r)
15185 }
15186}
15187
15188/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15189/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15190/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15191/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15192/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15193/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15194/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15195///
15196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350)
15197#[inline]
15198#[target_feature(enable = "avx512f,avx512vl")]
15199#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15200#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15201#[rustc_legacy_const_generics(3)]
15202pub fn _mm_mask_cvt_roundps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15203 unsafe {
15204 static_assert_uimm_bits!(IMM8, 8);
15205 let a: f32x4 = a.as_f32x4();
15206 let src: i16x8 = src.as_i16x8();
15207 let r: i16x8 = vcvtps2ph128(a, IMM8, src, mask:k);
15208 transmute(src:r)
15209 }
15210}
15211
15212/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15213/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15214/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15215/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15216/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15217/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15218/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15219///
15220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351)
15221#[inline]
15222#[target_feature(enable = "avx512f,avx512vl")]
15223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15224#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15225#[rustc_legacy_const_generics(2)]
15226pub fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15227 unsafe {
15228 static_assert_uimm_bits!(IMM8, 8);
15229 let a: f32x4 = a.as_f32x4();
15230 let r: i16x8 = vcvtps2ph128(a, IMM8, src:i16x8::ZERO, mask:k);
15231 transmute(src:r)
15232 }
15233}
15234
15235/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15236/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15237/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15238/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15239/// * [`_MM_FROUND_TO_POS_INF`] // round up
15240/// * [`_MM_FROUND_TO_ZERO`] // truncate
15241/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15242/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15243/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15244/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15245/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15246/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15247///
15248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
15249#[inline]
15250#[target_feature(enable = "avx512f")]
15251#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15252#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15253#[rustc_legacy_const_generics(1)]
15254pub fn _mm512_cvtps_ph<const ROUNDING: i32>(a: __m512) -> __m256i {
15255 unsafe {
15256 static_assert_extended_rounding!(ROUNDING);
15257 let a: f32x16 = a.as_f32x16();
15258 let r: i16x16 = vcvtps2ph(a, ROUNDING, src:i16x16::ZERO, mask:0b11111111_11111111);
15259 transmute(src:r)
15260 }
15261}
15262
15263/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15264/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15265/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15266/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15267/// * [`_MM_FROUND_TO_POS_INF`] // round up
15268/// * [`_MM_FROUND_TO_ZERO`] // truncate
15269/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15270/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15271/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15272/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15273/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15274/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15275///
15276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
15277#[inline]
15278#[target_feature(enable = "avx512f")]
15279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15280#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15281#[rustc_legacy_const_generics(3)]
15282pub fn _mm512_mask_cvtps_ph<const ROUNDING: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
15283 unsafe {
15284 static_assert_extended_rounding!(ROUNDING);
15285 let a: f32x16 = a.as_f32x16();
15286 let src: i16x16 = src.as_i16x16();
15287 let r: i16x16 = vcvtps2ph(a, ROUNDING, src, mask:k);
15288 transmute(src:r)
15289 }
15290}
15291
15292/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15293/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
15294/// * [`_MM_FROUND_TO_NEAREST_INT`] // round to nearest
15295/// * [`_MM_FROUND_TO_NEG_INF`] // round down
15296/// * [`_MM_FROUND_TO_POS_INF`] // round up
15297/// * [`_MM_FROUND_TO_ZERO`] // truncate
15298/// * [`_MM_FROUND_CUR_DIRECTION`] // use MXCSR.RC; see [`_MM_SET_ROUNDING_MODE`]
15299/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] // round to nearest, and suppress exceptions
15300/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] // round down, and suppress exceptions
15301/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] // round up, and suppress exceptions
15302/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] // truncate, and suppress exceptions
15303/// * [`_MM_FROUND_CUR_DIRECTION`] | [`_MM_FROUND_NO_EXC`] // use MXCSR.RC and suppress exceptions; see [`_MM_SET_ROUNDING_MODE`]
15304///
15305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
15306#[inline]
15307#[target_feature(enable = "avx512f")]
15308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15309#[cfg_attr(test, assert_instr(vcvtps2ph, ROUNDING = 8))]
15310#[rustc_legacy_const_generics(2)]
15311pub fn _mm512_maskz_cvtps_ph<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m256i {
15312 unsafe {
15313 static_assert_extended_rounding!(ROUNDING);
15314 let a: f32x16 = a.as_f32x16();
15315 let r: i16x16 = vcvtps2ph(a, ROUNDING, src:i16x16::ZERO, mask:k);
15316 transmute(src:r)
15317 }
15318}
15319
15320/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15321/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15322/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15323/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15324/// * [`_MM_FROUND_TO_POS_INF`] : round up
15325/// * [`_MM_FROUND_TO_ZERO`] : truncate
15326/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15327///
15328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776)
15329#[inline]
15330#[target_feature(enable = "avx512f,avx512vl")]
15331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15332#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15333#[rustc_legacy_const_generics(3)]
15334pub fn _mm256_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m256) -> __m128i {
15335 unsafe {
15336 static_assert_uimm_bits!(IMM8, 8);
15337 let a: f32x8 = a.as_f32x8();
15338 let src: i16x8 = src.as_i16x8();
15339 let r: i16x8 = vcvtps2ph256(a, IMM8, src, mask:k);
15340 transmute(src:r)
15341 }
15342}
15343
15344/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15345/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15346/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15347/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15348/// * [`_MM_FROUND_TO_POS_INF`] : round up
15349/// * [`_MM_FROUND_TO_ZERO`] : truncate
15350/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15351///
15352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777)
15353#[inline]
15354#[target_feature(enable = "avx512f,avx512vl")]
15355#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15356#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15357#[rustc_legacy_const_generics(2)]
15358pub fn _mm256_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15359 unsafe {
15360 static_assert_uimm_bits!(IMM8, 8);
15361 let a: f32x8 = a.as_f32x8();
15362 let r: i16x8 = vcvtps2ph256(a, IMM8, src:i16x8::ZERO, mask:k);
15363 transmute(src:r)
15364 }
15365}
15366
15367/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15368/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15369/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15370/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15371/// * [`_MM_FROUND_TO_POS_INF`] : round up
15372/// * [`_MM_FROUND_TO_ZERO`] : truncate
15373/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15374///
15375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773)
15376#[inline]
15377#[target_feature(enable = "avx512f,avx512vl")]
15378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15379#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15380#[rustc_legacy_const_generics(3)]
15381pub fn _mm_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15382 unsafe {
15383 static_assert_uimm_bits!(IMM8, 8);
15384 let a: f32x4 = a.as_f32x4();
15385 let src: i16x8 = src.as_i16x8();
15386 let r: i16x8 = vcvtps2ph128(a, IMM8, src, mask:k);
15387 transmute(src:r)
15388 }
15389}
15390
15391/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15392/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15393/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15394/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15395/// * [`_MM_FROUND_TO_POS_INF`] : round up
15396/// * [`_MM_FROUND_TO_ZERO`] : truncate
15397/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15398///
15399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774)
15400#[inline]
15401#[target_feature(enable = "avx512f,avx512vl")]
15402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15403#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15404#[rustc_legacy_const_generics(2)]
15405pub fn _mm_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15406 unsafe {
15407 static_assert_uimm_bits!(IMM8, 8);
15408 let a: f32x4 = a.as_f32x4();
15409 let r: i16x8 = vcvtps2ph128(a, IMM8, src:i16x8::ZERO, mask:k);
15410 transmute(src:r)
15411 }
15412}
15413
15414/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15415/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15416///
15417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332)
15418#[inline]
15419#[target_feature(enable = "avx512f")]
15420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15421#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15422#[rustc_legacy_const_generics(1)]
15423pub fn _mm512_cvt_roundph_ps<const SAE: i32>(a: __m256i) -> __m512 {
15424 unsafe {
15425 static_assert_sae!(SAE);
15426 let a: i16x16 = a.as_i16x16();
15427 let r: f32x16 = vcvtph2ps(a, src:f32x16::ZERO, mask:0b11111111_11111111, SAE);
15428 transmute(src:r)
15429 }
15430}
15431
15432/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15433/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15434///
15435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333)
15436#[inline]
15437#[target_feature(enable = "avx512f")]
15438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15439#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15440#[rustc_legacy_const_generics(3)]
15441pub fn _mm512_mask_cvt_roundph_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15442 unsafe {
15443 static_assert_sae!(SAE);
15444 let a: i16x16 = a.as_i16x16();
15445 let src: f32x16 = src.as_f32x16();
15446 let r: f32x16 = vcvtph2ps(a, src, mask:k, SAE);
15447 transmute(src:r)
15448 }
15449}
15450
15451/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15452/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15453///
15454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334)
15455#[inline]
15456#[target_feature(enable = "avx512f")]
15457#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15458#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15459#[rustc_legacy_const_generics(2)]
15460pub fn _mm512_maskz_cvt_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256i) -> __m512 {
15461 unsafe {
15462 static_assert_sae!(SAE);
15463 let a: i16x16 = a.as_i16x16();
15464 let r: f32x16 = vcvtph2ps(a, src:f32x16::ZERO, mask:k, SAE);
15465 transmute(src:r)
15466 }
15467}
15468
15469/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
15470///
15471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723)
15472#[inline]
15473#[target_feature(enable = "avx512f")]
15474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15475#[cfg_attr(test, assert_instr(vcvtph2ps))]
15476pub fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
15477 unsafe {
15478 transmute(src:vcvtph2ps(
15479 a.as_i16x16(),
15480 src:f32x16::ZERO,
15481 mask:0b11111111_11111111,
15482 _MM_FROUND_NO_EXC,
15483 ))
15484 }
15485}
15486
15487/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15488///
15489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724)
15490#[inline]
15491#[target_feature(enable = "avx512f")]
15492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15493#[cfg_attr(test, assert_instr(vcvtph2ps))]
15494pub fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15495 unsafe {
15496 transmute(src:vcvtph2ps(
15497 a.as_i16x16(),
15498 src.as_f32x16(),
15499 mask:k,
15500 _MM_FROUND_NO_EXC,
15501 ))
15502 }
15503}
15504
15505/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15506///
15507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725)
15508#[inline]
15509#[target_feature(enable = "avx512f")]
15510#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15511#[cfg_attr(test, assert_instr(vcvtph2ps))]
15512pub fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
15513 unsafe { transmute(src:vcvtph2ps(a.as_i16x16(), src:f32x16::ZERO, mask:k, _MM_FROUND_NO_EXC)) }
15514}
15515
15516/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15517///
15518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721)
15519#[inline]
15520#[target_feature(enable = "avx512f,avx512vl")]
15521#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15522#[cfg_attr(test, assert_instr(vcvtph2ps))]
15523pub fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
15524 unsafe {
15525 let convert: __m256 = _mm256_cvtph_ps(a);
15526 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x8(), no:src.as_f32x8()))
15527 }
15528}
15529
15530/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15531///
15532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722)
15533#[inline]
15534#[target_feature(enable = "avx512f,avx512vl")]
15535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15536#[cfg_attr(test, assert_instr(vcvtph2ps))]
15537pub fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
15538 unsafe {
15539 let convert: __m256 = _mm256_cvtph_ps(a);
15540 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x8(), no:f32x8::ZERO))
15541 }
15542}
15543
15544/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15545///
15546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718)
15547#[inline]
15548#[target_feature(enable = "avx512f,avx512vl")]
15549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15550#[cfg_attr(test, assert_instr(vcvtph2ps))]
15551pub fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
15552 unsafe {
15553 let convert: __m128 = _mm_cvtph_ps(a);
15554 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
15555 }
15556}
15557
15558/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15559///
15560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719)
15561#[inline]
15562#[target_feature(enable = "avx512f,avx512vl")]
15563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15564#[cfg_attr(test, assert_instr(vcvtph2ps))]
15565pub fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
15566 unsafe {
15567 let convert: __m128 = _mm_cvtph_ps(a);
15568 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:f32x4::ZERO))
15569 }
15570}
15571
15572/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15573/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15574///
15575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916)
15576#[inline]
15577#[target_feature(enable = "avx512f")]
15578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15579#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15580#[rustc_legacy_const_generics(1)]
15581pub fn _mm512_cvtt_roundps_epi32<const SAE: i32>(a: __m512) -> __m512i {
15582 unsafe {
15583 static_assert_sae!(SAE);
15584 let a: f32x16 = a.as_f32x16();
15585 let r: i32x16 = vcvttps2dq(a, src:i32x16::ZERO, mask:0b11111111_11111111, SAE);
15586 transmute(src:r)
15587 }
15588}
15589
15590/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15591/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15592///
15593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917)
15594#[inline]
15595#[target_feature(enable = "avx512f")]
15596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15597#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15598#[rustc_legacy_const_generics(3)]
15599pub fn _mm512_mask_cvtt_roundps_epi32<const SAE: i32>(
15600 src: __m512i,
15601 k: __mmask16,
15602 a: __m512,
15603) -> __m512i {
15604 unsafe {
15605 static_assert_sae!(SAE);
15606 let a: f32x16 = a.as_f32x16();
15607 let src: i32x16 = src.as_i32x16();
15608 let r: i32x16 = vcvttps2dq(a, src, mask:k, SAE);
15609 transmute(src:r)
15610 }
15611}
15612
15613/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15614/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15615///
15616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
15617#[inline]
15618#[target_feature(enable = "avx512f")]
15619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15620#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15621#[rustc_legacy_const_generics(2)]
15622pub fn _mm512_maskz_cvtt_roundps_epi32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15623 unsafe {
15624 static_assert_sae!(SAE);
15625 let a: f32x16 = a.as_f32x16();
15626 let r: i32x16 = vcvttps2dq(a, src:i32x16::ZERO, mask:k, SAE);
15627 transmute(src:r)
15628 }
15629}
15630
15631/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15632/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15633///
15634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922)
15635#[inline]
15636#[target_feature(enable = "avx512f")]
15637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15638#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15639#[rustc_legacy_const_generics(1)]
15640pub fn _mm512_cvtt_roundps_epu32<const SAE: i32>(a: __m512) -> __m512i {
15641 unsafe {
15642 static_assert_sae!(SAE);
15643 let a: f32x16 = a.as_f32x16();
15644 let r: u32x16 = vcvttps2udq(a, src:u32x16::ZERO, mask:0b11111111_11111111, SAE);
15645 transmute(src:r)
15646 }
15647}
15648
15649/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15650/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15651///
15652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923)
15653#[inline]
15654#[target_feature(enable = "avx512f")]
15655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15656#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15657#[rustc_legacy_const_generics(3)]
15658pub fn _mm512_mask_cvtt_roundps_epu32<const SAE: i32>(
15659 src: __m512i,
15660 k: __mmask16,
15661 a: __m512,
15662) -> __m512i {
15663 unsafe {
15664 static_assert_sae!(SAE);
15665 let a: f32x16 = a.as_f32x16();
15666 let src: u32x16 = src.as_u32x16();
15667 let r: u32x16 = vcvttps2udq(a, src, mask:k, SAE);
15668 transmute(src:r)
15669 }
15670}
15671
15672/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15673/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15674///
15675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924)
15676#[inline]
15677#[target_feature(enable = "avx512f")]
15678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15679#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15680#[rustc_legacy_const_generics(2)]
15681pub fn _mm512_maskz_cvtt_roundps_epu32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15682 unsafe {
15683 static_assert_sae!(SAE);
15684 let a: f32x16 = a.as_f32x16();
15685 let r: u32x16 = vcvttps2udq(a, src:u32x16::ZERO, mask:k, SAE);
15686 transmute(src:r)
15687 }
15688}
15689
15690/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15691/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15692///
15693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904)
15694#[inline]
15695#[target_feature(enable = "avx512f")]
15696#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15697#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15698#[rustc_legacy_const_generics(1)]
15699pub fn _mm512_cvtt_roundpd_epi32<const SAE: i32>(a: __m512d) -> __m256i {
15700 unsafe {
15701 static_assert_sae!(SAE);
15702 let a: f64x8 = a.as_f64x8();
15703 let r: i32x8 = vcvttpd2dq(a, src:i32x8::ZERO, mask:0b11111111, SAE);
15704 transmute(src:r)
15705 }
15706}
15707
15708/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15709/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15710///
15711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905)
15712#[inline]
15713#[target_feature(enable = "avx512f")]
15714#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15715#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15716#[rustc_legacy_const_generics(3)]
15717pub fn _mm512_mask_cvtt_roundpd_epi32<const SAE: i32>(
15718 src: __m256i,
15719 k: __mmask8,
15720 a: __m512d,
15721) -> __m256i {
15722 unsafe {
15723 static_assert_sae!(SAE);
15724 let a: f64x8 = a.as_f64x8();
15725 let src: i32x8 = src.as_i32x8();
15726 let r: i32x8 = vcvttpd2dq(a, src, mask:k, SAE);
15727 transmute(src:r)
15728 }
15729}
15730
15731/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15732/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15733///
15734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi32&expand=1918)
15735#[inline]
15736#[target_feature(enable = "avx512f")]
15737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15738#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15739#[rustc_legacy_const_generics(2)]
15740pub fn _mm512_maskz_cvtt_roundpd_epi32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15741 unsafe {
15742 static_assert_sae!(SAE);
15743 let a: f64x8 = a.as_f64x8();
15744 let r: i32x8 = vcvttpd2dq(a, src:i32x8::ZERO, mask:k, SAE);
15745 transmute(src:r)
15746 }
15747}
15748
15749/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15750/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15751///
15752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910)
15753#[inline]
15754#[target_feature(enable = "avx512f")]
15755#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15756#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15757#[rustc_legacy_const_generics(1)]
15758pub fn _mm512_cvtt_roundpd_epu32<const SAE: i32>(a: __m512d) -> __m256i {
15759 unsafe {
15760 static_assert_sae!(SAE);
15761 let a: f64x8 = a.as_f64x8();
15762 let r: u32x8 = vcvttpd2udq(a, src:i32x8::ZERO, mask:0b11111111, SAE);
15763 transmute(src:r)
15764 }
15765}
15766
15767/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15768/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15769///
15770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911)
15771#[inline]
15772#[target_feature(enable = "avx512f")]
15773#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15774#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15775#[rustc_legacy_const_generics(3)]
15776pub fn _mm512_mask_cvtt_roundpd_epu32<const SAE: i32>(
15777 src: __m256i,
15778 k: __mmask8,
15779 a: __m512d,
15780) -> __m256i {
15781 unsafe {
15782 static_assert_sae!(SAE);
15783 let a: f64x8 = a.as_f64x8();
15784 let src: i32x8 = src.as_i32x8();
15785 let r: u32x8 = vcvttpd2udq(a, src, mask:k, SAE);
15786 transmute(src:r)
15787 }
15788}
15789
15790/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15791///
15792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984)
15793#[inline]
15794#[target_feature(enable = "avx512f")]
15795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15796#[cfg_attr(test, assert_instr(vcvttps2dq))]
15797pub fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
15798 unsafe {
15799 transmute(src:vcvttps2dq(
15800 a.as_f32x16(),
15801 src:i32x16::ZERO,
15802 mask:0b11111111_11111111,
15803 _MM_FROUND_CUR_DIRECTION,
15804 ))
15805 }
15806}
15807
15808/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15809///
15810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985)
15811#[inline]
15812#[target_feature(enable = "avx512f")]
15813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15814#[cfg_attr(test, assert_instr(vcvttps2dq))]
15815pub fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15816 unsafe {
15817 transmute(src:vcvttps2dq(
15818 a.as_f32x16(),
15819 src.as_i32x16(),
15820 mask:k,
15821 _MM_FROUND_CUR_DIRECTION,
15822 ))
15823 }
15824}
15825
15826/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15827///
15828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986)
15829#[inline]
15830#[target_feature(enable = "avx512f")]
15831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15832#[cfg_attr(test, assert_instr(vcvttps2dq))]
15833pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
15834 unsafe {
15835 transmute(src:vcvttps2dq(
15836 a.as_f32x16(),
15837 src:i32x16::ZERO,
15838 mask:k,
15839 _MM_FROUND_CUR_DIRECTION,
15840 ))
15841 }
15842}
15843
15844/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15845///
15846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982)
15847#[inline]
15848#[target_feature(enable = "avx512f,avx512vl")]
15849#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15850#[cfg_attr(test, assert_instr(vcvttps2dq))]
15851pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15852 unsafe { transmute(src:vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), mask:k)) }
15853}
15854
15855/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15856///
15857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983)
15858#[inline]
15859#[target_feature(enable = "avx512f,avx512vl")]
15860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15861#[cfg_attr(test, assert_instr(vcvttps2dq))]
15862pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
15863 unsafe { transmute(src:vcvttps2dq256(a.as_f32x8(), src:i32x8::ZERO, mask:k)) }
15864}
15865
15866/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15867///
15868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979)
15869#[inline]
15870#[target_feature(enable = "avx512f,avx512vl")]
15871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15872#[cfg_attr(test, assert_instr(vcvttps2dq))]
15873pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15874 unsafe { transmute(src:vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), mask:k)) }
15875}
15876
15877/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15878///
15879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980)
15880#[inline]
15881#[target_feature(enable = "avx512f,avx512vl")]
15882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15883#[cfg_attr(test, assert_instr(vcvttps2dq))]
15884pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
15885 unsafe { transmute(src:vcvttps2dq128(a.as_f32x4(), src:i32x4::ZERO, mask:k)) }
15886}
15887
15888/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15889///
15890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002)
15891#[inline]
15892#[target_feature(enable = "avx512f")]
15893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15894#[cfg_attr(test, assert_instr(vcvttps2udq))]
15895pub fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
15896 unsafe {
15897 transmute(src:vcvttps2udq(
15898 a.as_f32x16(),
15899 src:u32x16::ZERO,
15900 mask:0b11111111_11111111,
15901 _MM_FROUND_CUR_DIRECTION,
15902 ))
15903 }
15904}
15905
15906/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15907///
15908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003)
15909#[inline]
15910#[target_feature(enable = "avx512f")]
15911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15912#[cfg_attr(test, assert_instr(vcvttps2udq))]
15913pub fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15914 unsafe {
15915 transmute(src:vcvttps2udq(
15916 a.as_f32x16(),
15917 src.as_u32x16(),
15918 mask:k,
15919 _MM_FROUND_CUR_DIRECTION,
15920 ))
15921 }
15922}
15923
15924/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15925///
15926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004)
15927#[inline]
15928#[target_feature(enable = "avx512f")]
15929#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15930#[cfg_attr(test, assert_instr(vcvttps2udq))]
15931pub fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
15932 unsafe {
15933 transmute(src:vcvttps2udq(
15934 a.as_f32x16(),
15935 src:u32x16::ZERO,
15936 mask:k,
15937 _MM_FROUND_CUR_DIRECTION,
15938 ))
15939 }
15940}
15941
15942/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15943///
15944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999)
15945#[inline]
15946#[target_feature(enable = "avx512f,avx512vl")]
15947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15948#[cfg_attr(test, assert_instr(vcvttps2udq))]
15949pub fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
15950 unsafe { transmute(src:vcvttps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:0b11111111)) }
15951}
15952
15953/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15954///
15955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000)
15956#[inline]
15957#[target_feature(enable = "avx512f,avx512vl")]
15958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15959#[cfg_attr(test, assert_instr(vcvttps2udq))]
15960pub fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15961 unsafe { transmute(src:vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), mask:k)) }
15962}
15963
15964/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15965///
15966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001)
15967#[inline]
15968#[target_feature(enable = "avx512f,avx512vl")]
15969#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15970#[cfg_attr(test, assert_instr(vcvttps2udq))]
15971pub fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
15972 unsafe { transmute(src:vcvttps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:k)) }
15973}
15974
15975/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15976///
15977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996)
15978#[inline]
15979#[target_feature(enable = "avx512f,avx512vl")]
15980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15981#[cfg_attr(test, assert_instr(vcvttps2udq))]
15982pub fn _mm_cvttps_epu32(a: __m128) -> __m128i {
15983 unsafe { transmute(src:vcvttps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:0b11111111)) }
15984}
15985
15986/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15987///
15988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997)
15989#[inline]
15990#[target_feature(enable = "avx512f,avx512vl")]
15991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
15992#[cfg_attr(test, assert_instr(vcvttps2udq))]
15993pub fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15994 unsafe { transmute(src:vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), mask:k)) }
15995}
15996
15997/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15998///
15999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998)
16000#[inline]
16001#[target_feature(enable = "avx512f,avx512vl")]
16002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16003#[cfg_attr(test, assert_instr(vcvttps2udq))]
16004pub fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
16005 unsafe { transmute(src:vcvttps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:k)) }
16006}
16007
16008/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
16009/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
16010///
16011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912)
16012#[inline]
16013#[target_feature(enable = "avx512f")]
16014#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16015#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
16016#[rustc_legacy_const_generics(2)]
16017pub fn _mm512_maskz_cvtt_roundpd_epu32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
16018 unsafe {
16019 static_assert_sae!(SAE);
16020 let a: f64x8 = a.as_f64x8();
16021 let r: u32x8 = vcvttpd2udq(a, src:i32x8::ZERO, mask:k, SAE);
16022 transmute(src:r)
16023 }
16024}
16025
16026/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
16027///
16028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947)
16029#[inline]
16030#[target_feature(enable = "avx512f")]
16031#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16032#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16033pub fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
16034 unsafe {
16035 transmute(src:vcvttpd2dq(
16036 a.as_f64x8(),
16037 src:i32x8::ZERO,
16038 mask:0b11111111,
16039 _MM_FROUND_CUR_DIRECTION,
16040 ))
16041 }
16042}
16043
16044/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16045///
16046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948)
16047#[inline]
16048#[target_feature(enable = "avx512f")]
16049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16050#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16051pub fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16052 unsafe {
16053 transmute(src:vcvttpd2dq(
16054 a.as_f64x8(),
16055 src.as_i32x8(),
16056 mask:k,
16057 _MM_FROUND_CUR_DIRECTION,
16058 ))
16059 }
16060}
16061
16062/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16063///
16064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949)
16065#[inline]
16066#[target_feature(enable = "avx512f")]
16067#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16068#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16069pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
16070 unsafe {
16071 transmute(src:vcvttpd2dq(
16072 a.as_f64x8(),
16073 src:i32x8::ZERO,
16074 mask:k,
16075 _MM_FROUND_CUR_DIRECTION,
16076 ))
16077 }
16078}
16079
16080/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16081///
16082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945)
16083#[inline]
16084#[target_feature(enable = "avx512f,avx512vl")]
16085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16086#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16087pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16088 unsafe { transmute(src:vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), mask:k)) }
16089}
16090
16091/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16092///
16093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946)
16094#[inline]
16095#[target_feature(enable = "avx512f,avx512vl")]
16096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16097#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16098pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
16099 unsafe { transmute(src:vcvttpd2dq256(a.as_f64x4(), src:i32x4::ZERO, mask:k)) }
16100}
16101
16102/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16103///
16104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942)
16105#[inline]
16106#[target_feature(enable = "avx512f,avx512vl")]
16107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16108#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16109pub fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16110 unsafe { transmute(src:vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), mask:k)) }
16111}
16112
16113/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16114///
16115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943)
16116#[inline]
16117#[target_feature(enable = "avx512f,avx512vl")]
16118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16119#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16120pub fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
16121 unsafe { transmute(src:vcvttpd2dq128(a.as_f64x2(), src:i32x4::ZERO, mask:k)) }
16122}
16123
16124/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16125///
16126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965)
16127#[inline]
16128#[target_feature(enable = "avx512f")]
16129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16130#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16131pub fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
16132 unsafe {
16133 transmute(src:vcvttpd2udq(
16134 a.as_f64x8(),
16135 src:i32x8::ZERO,
16136 mask:0b11111111,
16137 _MM_FROUND_CUR_DIRECTION,
16138 ))
16139 }
16140}
16141
16142/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16143///
16144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966)
16145#[inline]
16146#[target_feature(enable = "avx512f")]
16147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16148#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16149pub fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16150 unsafe {
16151 transmute(src:vcvttpd2udq(
16152 a.as_f64x8(),
16153 src.as_i32x8(),
16154 mask:k,
16155 _MM_FROUND_CUR_DIRECTION,
16156 ))
16157 }
16158}
16159
16160/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16161///
16162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967)
16163#[inline]
16164#[target_feature(enable = "avx512f")]
16165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16166#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16167pub fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
16168 unsafe {
16169 transmute(src:vcvttpd2udq(
16170 a.as_f64x8(),
16171 src:i32x8::ZERO,
16172 mask:k,
16173 _MM_FROUND_CUR_DIRECTION,
16174 ))
16175 }
16176}
16177
16178/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16179///
16180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962)
16181#[inline]
16182#[target_feature(enable = "avx512f,avx512vl")]
16183#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16184#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16185pub fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
16186 unsafe { transmute(src:vcvttpd2udq256(a.as_f64x4(), src:i32x4::ZERO, mask:0b11111111)) }
16187}
16188
16189/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16190///
16191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963)
16192#[inline]
16193#[target_feature(enable = "avx512f,avx512vl")]
16194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16195#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16196pub fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16197 unsafe { transmute(src:vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), mask:k)) }
16198}
16199
16200/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16201///
16202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964)
16203#[inline]
16204#[target_feature(enable = "avx512f,avx512vl")]
16205#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16206#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16207pub fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
16208 unsafe { transmute(src:vcvttpd2udq256(a.as_f64x4(), src:i32x4::ZERO, mask:k)) }
16209}
16210
16211/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16212///
16213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959)
16214#[inline]
16215#[target_feature(enable = "avx512f,avx512vl")]
16216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16217#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16218pub fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
16219 unsafe { transmute(src:vcvttpd2udq128(a.as_f64x2(), src:i32x4::ZERO, mask:0b11111111)) }
16220}
16221
16222/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16223///
16224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960)
16225#[inline]
16226#[target_feature(enable = "avx512f,avx512vl")]
16227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16228#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16229pub fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16230 unsafe { transmute(src:vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), mask:k)) }
16231}
16232
16233/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16234///
16235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961)
16236#[inline]
16237#[target_feature(enable = "avx512f,avx512vl")]
16238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16239#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16240pub fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
16241 unsafe { transmute(src:vcvttpd2udq128(a.as_f64x2(), src:i32x4::ZERO, mask:k)) }
16242}
16243
16244/// Returns vector of type `__m512d` with all elements set to zero.
16245///
16246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018)
16247#[inline]
16248#[target_feature(enable = "avx512f")]
16249#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16250#[cfg_attr(test, assert_instr(vxorps))]
16251pub fn _mm512_setzero_pd() -> __m512d {
16252 // All-0 is a properly initialized __m512d
16253 unsafe { const { mem::zeroed() } }
16254}
16255
16256/// Returns vector of type `__m512` with all elements set to zero.
16257///
16258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021)
16259#[inline]
16260#[target_feature(enable = "avx512f")]
16261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16262#[cfg_attr(test, assert_instr(vxorps))]
16263pub fn _mm512_setzero_ps() -> __m512 {
16264 // All-0 is a properly initialized __m512
16265 unsafe { const { mem::zeroed() } }
16266}
16267
16268/// Return vector of type `__m512` with all elements set to zero.
16269///
16270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014)
16271#[inline]
16272#[target_feature(enable = "avx512f")]
16273#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16274#[cfg_attr(test, assert_instr(vxorps))]
16275pub fn _mm512_setzero() -> __m512 {
16276 // All-0 is a properly initialized __m512
16277 unsafe { const { mem::zeroed() } }
16278}
16279
16280/// Returns vector of type `__m512i` with all elements set to zero.
16281///
16282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024)
16283#[inline]
16284#[target_feature(enable = "avx512f")]
16285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16286#[cfg_attr(test, assert_instr(vxorps))]
16287pub fn _mm512_setzero_si512() -> __m512i {
16288 // All-0 is a properly initialized __m512i
16289 unsafe { const { mem::zeroed() } }
16290}
16291
16292/// Return vector of type `__m512i` with all elements set to zero.
16293///
16294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015)
16295#[inline]
16296#[target_feature(enable = "avx512f")]
16297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16298#[cfg_attr(test, assert_instr(vxorps))]
16299pub fn _mm512_setzero_epi32() -> __m512i {
16300 // All-0 is a properly initialized __m512i
16301 unsafe { const { mem::zeroed() } }
16302}
16303
16304/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
16305/// order.
16306///
16307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991)
16308#[inline]
16309#[target_feature(enable = "avx512f")]
16310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16311pub fn _mm512_setr_epi32(
16312 e15: i32,
16313 e14: i32,
16314 e13: i32,
16315 e12: i32,
16316 e11: i32,
16317 e10: i32,
16318 e9: i32,
16319 e8: i32,
16320 e7: i32,
16321 e6: i32,
16322 e5: i32,
16323 e4: i32,
16324 e3: i32,
16325 e2: i32,
16326 e1: i32,
16327 e0: i32,
16328) -> __m512i {
16329 unsafe {
16330 let r: i32x16 = i32x16::new(
16331 x0:e15, x1:e14, x2:e13, x3:e12, x4:e11, x5:e10, x6:e9, x7:e8, x8:e7, x9:e6, x10:e5, x11:e4, x12:e3, x13:e2, x14:e1, x15:e0,
16332 );
16333 transmute(src:r)
16334 }
16335}
16336
16337/// Set packed 8-bit integers in dst with the supplied values.
16338///
16339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915)
16340#[inline]
16341#[target_feature(enable = "avx512f")]
16342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16343pub fn _mm512_set_epi8(
16344 e63: i8,
16345 e62: i8,
16346 e61: i8,
16347 e60: i8,
16348 e59: i8,
16349 e58: i8,
16350 e57: i8,
16351 e56: i8,
16352 e55: i8,
16353 e54: i8,
16354 e53: i8,
16355 e52: i8,
16356 e51: i8,
16357 e50: i8,
16358 e49: i8,
16359 e48: i8,
16360 e47: i8,
16361 e46: i8,
16362 e45: i8,
16363 e44: i8,
16364 e43: i8,
16365 e42: i8,
16366 e41: i8,
16367 e40: i8,
16368 e39: i8,
16369 e38: i8,
16370 e37: i8,
16371 e36: i8,
16372 e35: i8,
16373 e34: i8,
16374 e33: i8,
16375 e32: i8,
16376 e31: i8,
16377 e30: i8,
16378 e29: i8,
16379 e28: i8,
16380 e27: i8,
16381 e26: i8,
16382 e25: i8,
16383 e24: i8,
16384 e23: i8,
16385 e22: i8,
16386 e21: i8,
16387 e20: i8,
16388 e19: i8,
16389 e18: i8,
16390 e17: i8,
16391 e16: i8,
16392 e15: i8,
16393 e14: i8,
16394 e13: i8,
16395 e12: i8,
16396 e11: i8,
16397 e10: i8,
16398 e9: i8,
16399 e8: i8,
16400 e7: i8,
16401 e6: i8,
16402 e5: i8,
16403 e4: i8,
16404 e3: i8,
16405 e2: i8,
16406 e1: i8,
16407 e0: i8,
16408) -> __m512i {
16409 unsafe {
16410 let r: i8x64 = i8x64::new(
16411 x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, x16:e16, x17:e17, x18:e18,
16412 x19:e19, x20:e20, x21:e21, x22:e22, x23:e23, x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31, x32:e32, x33:e33, x34:e34, x35:e35,
16413 x36:e36, x37:e37, x38:e38, x39:e39, x40:e40, x41:e41, x42:e42, x43:e43, x44:e44, x45:e45, x46:e46, x47:e47, x48:e48, x49:e49, x50:e50, x51:e51, x52:e52,
16414 x53:e53, x54:e54, x55:e55, x56:e56, x57:e57, x58:e58, x59:e59, x60:e60, x61:e61, x62:e62, x63:e63,
16415 );
16416 transmute(src:r)
16417 }
16418}
16419
16420/// Set packed 16-bit integers in dst with the supplied values.
16421///
16422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905)
16423#[inline]
16424#[target_feature(enable = "avx512f")]
16425#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16426pub fn _mm512_set_epi16(
16427 e31: i16,
16428 e30: i16,
16429 e29: i16,
16430 e28: i16,
16431 e27: i16,
16432 e26: i16,
16433 e25: i16,
16434 e24: i16,
16435 e23: i16,
16436 e22: i16,
16437 e21: i16,
16438 e20: i16,
16439 e19: i16,
16440 e18: i16,
16441 e17: i16,
16442 e16: i16,
16443 e15: i16,
16444 e14: i16,
16445 e13: i16,
16446 e12: i16,
16447 e11: i16,
16448 e10: i16,
16449 e9: i16,
16450 e8: i16,
16451 e7: i16,
16452 e6: i16,
16453 e5: i16,
16454 e4: i16,
16455 e3: i16,
16456 e2: i16,
16457 e1: i16,
16458 e0: i16,
16459) -> __m512i {
16460 unsafe {
16461 let r: i16x32 = i16x32::new(
16462 x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, x16:e16, x17:e17, x18:e18,
16463 x19:e19, x20:e20, x21:e21, x22:e22, x23:e23, x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31,
16464 );
16465 transmute(src:r)
16466 }
16467}
16468
16469/// Set packed 32-bit integers in dst with the repeated 4 element sequence.
16470///
16471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982)
16472#[inline]
16473#[target_feature(enable = "avx512f")]
16474#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16475pub fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16476 _mm512_set_epi32(e15:d, e14:c, e13:b, e12:a, e11:d, e10:c, e9:b, e8:a, e7:d, e6:c, e5:b, e4:a, e3:d, e2:c, e1:b, e0:a)
16477}
16478
16479/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
16480///
16481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985)
16482#[inline]
16483#[target_feature(enable = "avx512f")]
16484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16485pub fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16486 _mm512_set_ps(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a, e8:d, e9:c, e10:b, e11:a, e12:d, e13:c, e14:b, e15:a)
16487}
16488
16489/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
16490///
16491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984)
16492#[inline]
16493#[target_feature(enable = "avx512f")]
16494#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16495pub fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16496 _mm512_set_pd(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a)
16497}
16498
16499/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
16500///
16501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009)
16502#[inline]
16503#[target_feature(enable = "avx512f")]
16504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16505pub fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16506 _mm512_set_epi32(e15:a, e14:b, e13:c, e12:d, e11:a, e10:b, e9:c, e8:d, e7:a, e6:b, e5:c, e4:d, e3:a, e2:b, e1:c, e0:d)
16507}
16508
16509/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16510///
16511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012)
16512#[inline]
16513#[target_feature(enable = "avx512f")]
16514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16515pub fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16516 _mm512_set_ps(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d, e8:a, e9:b, e10:c, e11:d, e12:a, e13:b, e14:c, e15:d)
16517}
16518
16519/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16520///
16521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011)
16522#[inline]
16523#[target_feature(enable = "avx512f")]
16524#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16525pub fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16526 _mm512_set_pd(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d)
16527}
16528
16529/// Set packed 64-bit integers in dst with the supplied values.
16530///
16531/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910)
16532#[inline]
16533#[target_feature(enable = "avx512f")]
16534#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16535pub fn _mm512_set_epi64(
16536 e0: i64,
16537 e1: i64,
16538 e2: i64,
16539 e3: i64,
16540 e4: i64,
16541 e5: i64,
16542 e6: i64,
16543 e7: i64,
16544) -> __m512i {
16545 _mm512_setr_epi64(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
16546}
16547
16548/// Set packed 64-bit integers in dst with the supplied values in reverse order.
16549///
16550/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993)
16551#[inline]
16552#[target_feature(enable = "avx512f")]
16553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16554pub fn _mm512_setr_epi64(
16555 e0: i64,
16556 e1: i64,
16557 e2: i64,
16558 e3: i64,
16559 e4: i64,
16560 e5: i64,
16561 e6: i64,
16562 e7: i64,
16563) -> __m512i {
16564 unsafe {
16565 let r: i64x8 = i64x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7);
16566 transmute(src:r)
16567 }
16568}
16569
16570/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16571///
16572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002)
16573#[inline]
16574#[target_feature(enable = "avx512f")]
16575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16576#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16577#[rustc_legacy_const_generics(2)]
16578pub unsafe fn _mm512_i32gather_pd<const SCALE: i32>(
16579 offsets: __m256i,
16580 slice: *const f64,
16581) -> __m512d {
16582 static_assert_imm8_scale!(SCALE);
16583 let zero: f64x8 = f64x8::ZERO;
16584 let neg_one: i8 = -1;
16585 let slice: *const i8 = slice as *const i8;
16586 let offsets: i32x8 = offsets.as_i32x8();
16587 let r: f64x8 = vgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE);
16588 transmute(src:r)
16589}
16590
16591/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16592///
16593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003)
16594#[inline]
16595#[target_feature(enable = "avx512f")]
16596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16597#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16598#[rustc_legacy_const_generics(4)]
16599pub unsafe fn _mm512_mask_i32gather_pd<const SCALE: i32>(
16600 src: __m512d,
16601 mask: __mmask8,
16602 offsets: __m256i,
16603 slice: *const f64,
16604) -> __m512d {
16605 static_assert_imm8_scale!(SCALE);
16606 let src: f64x8 = src.as_f64x8();
16607 let slice: *const i8 = slice as *const i8;
16608 let offsets: i32x8 = offsets.as_i32x8();
16609 let r: f64x8 = vgatherdpd(src, slice, offsets, mask as i8, SCALE);
16610 transmute(src:r)
16611}
16612
16613/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16614///
16615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092)
16616#[inline]
16617#[target_feature(enable = "avx512f")]
16618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16619#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16620#[rustc_legacy_const_generics(2)]
16621pub unsafe fn _mm512_i64gather_pd<const SCALE: i32>(
16622 offsets: __m512i,
16623 slice: *const f64,
16624) -> __m512d {
16625 static_assert_imm8_scale!(SCALE);
16626 let zero: f64x8 = f64x8::ZERO;
16627 let neg_one: i8 = -1;
16628 let slice: *const i8 = slice as *const i8;
16629 let offsets: i64x8 = offsets.as_i64x8();
16630 let r: f64x8 = vgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE);
16631 transmute(src:r)
16632}
16633
16634/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16635///
16636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093)
16637#[inline]
16638#[target_feature(enable = "avx512f")]
16639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16640#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16641#[rustc_legacy_const_generics(4)]
16642pub unsafe fn _mm512_mask_i64gather_pd<const SCALE: i32>(
16643 src: __m512d,
16644 mask: __mmask8,
16645 offsets: __m512i,
16646 slice: *const f64,
16647) -> __m512d {
16648 static_assert_imm8_scale!(SCALE);
16649 let src: f64x8 = src.as_f64x8();
16650 let slice: *const i8 = slice as *const i8;
16651 let offsets: i64x8 = offsets.as_i64x8();
16652 let r: f64x8 = vgatherqpd(src, slice, offsets, mask as i8, SCALE);
16653 transmute(src:r)
16654}
16655
16656/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16657///
16658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100)
16659#[inline]
16660#[target_feature(enable = "avx512f")]
16661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16662#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16663#[rustc_legacy_const_generics(2)]
16664pub unsafe fn _mm512_i64gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m256 {
16665 static_assert_imm8_scale!(SCALE);
16666 let zero: f32x8 = f32x8::ZERO;
16667 let neg_one: i8 = -1;
16668 let slice: *const i8 = slice as *const i8;
16669 let offsets: i64x8 = offsets.as_i64x8();
16670 let r: f32x8 = vgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE);
16671 transmute(src:r)
16672}
16673
16674/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16675///
16676/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101)
16677#[inline]
16678#[target_feature(enable = "avx512f")]
16679#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16680#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16681#[rustc_legacy_const_generics(4)]
16682pub unsafe fn _mm512_mask_i64gather_ps<const SCALE: i32>(
16683 src: __m256,
16684 mask: __mmask8,
16685 offsets: __m512i,
16686 slice: *const f32,
16687) -> __m256 {
16688 static_assert_imm8_scale!(SCALE);
16689 let src: f32x8 = src.as_f32x8();
16690 let slice: *const i8 = slice as *const i8;
16691 let offsets: i64x8 = offsets.as_i64x8();
16692 let r: f32x8 = vgatherqps(src, slice, offsets, mask as i8, SCALE);
16693 transmute(src:r)
16694}
16695
16696/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16697///
16698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010)
16699#[inline]
16700#[target_feature(enable = "avx512f")]
16701#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16702#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16703#[rustc_legacy_const_generics(2)]
16704pub unsafe fn _mm512_i32gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const f32) -> __m512 {
16705 static_assert_imm8_scale!(SCALE);
16706 let zero: f32x16 = f32x16::ZERO;
16707 let neg_one: i16 = -1;
16708 let slice: *const i8 = slice as *const i8;
16709 let offsets: i32x16 = offsets.as_i32x16();
16710 let r: f32x16 = vgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE);
16711 transmute(src:r)
16712}
16713
16714/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16715///
16716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011)
16717#[inline]
16718#[target_feature(enable = "avx512f")]
16719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16720#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16721#[rustc_legacy_const_generics(4)]
16722pub unsafe fn _mm512_mask_i32gather_ps<const SCALE: i32>(
16723 src: __m512,
16724 mask: __mmask16,
16725 offsets: __m512i,
16726 slice: *const f32,
16727) -> __m512 {
16728 static_assert_imm8_scale!(SCALE);
16729 let src: f32x16 = src.as_f32x16();
16730 let slice: *const i8 = slice as *const i8;
16731 let offsets: i32x16 = offsets.as_i32x16();
16732 let r: f32x16 = vgatherdps(src, slice, offsets, mask as i16, SCALE);
16733 transmute(src:r)
16734}
16735
16736/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16737///
16738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986)
16739#[inline]
16740#[target_feature(enable = "avx512f")]
16741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16742#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16743#[rustc_legacy_const_generics(2)]
16744pub unsafe fn _mm512_i32gather_epi32<const SCALE: i32>(
16745 offsets: __m512i,
16746 slice: *const i32,
16747) -> __m512i {
16748 static_assert_imm8_scale!(SCALE);
16749 let zero: i32x16 = i32x16::ZERO;
16750 let neg_one: i16 = -1;
16751 let slice: *const i8 = slice as *const i8;
16752 let offsets: i32x16 = offsets.as_i32x16();
16753 let r: i32x16 = vpgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE);
16754 transmute(src:r)
16755}
16756
16757/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16758///
16759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987)
16760#[inline]
16761#[target_feature(enable = "avx512f")]
16762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16763#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16764#[rustc_legacy_const_generics(4)]
16765pub unsafe fn _mm512_mask_i32gather_epi32<const SCALE: i32>(
16766 src: __m512i,
16767 mask: __mmask16,
16768 offsets: __m512i,
16769 slice: *const i32,
16770) -> __m512i {
16771 static_assert_imm8_scale!(SCALE);
16772 let src: i32x16 = src.as_i32x16();
16773 let mask: i16 = mask as i16;
16774 let slice: *const i8 = slice as *const i8;
16775 let offsets: i32x16 = offsets.as_i32x16();
16776 let r: i32x16 = vpgatherdd(src, slice, offsets, mask, SCALE);
16777 transmute(src:r)
16778}
16779
16780/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16781///
16782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994)
16783#[inline]
16784#[target_feature(enable = "avx512f")]
16785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16786#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16787#[rustc_legacy_const_generics(2)]
16788pub unsafe fn _mm512_i32gather_epi64<const SCALE: i32>(
16789 offsets: __m256i,
16790 slice: *const i64,
16791) -> __m512i {
16792 static_assert_imm8_scale!(SCALE);
16793 let zero: i64x8 = i64x8::ZERO;
16794 let neg_one: i8 = -1;
16795 let slice: *const i8 = slice as *const i8;
16796 let offsets: i32x8 = offsets.as_i32x8();
16797 let r: i64x8 = vpgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE);
16798 transmute(src:r)
16799}
16800
16801/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16802///
16803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995)
16804#[inline]
16805#[target_feature(enable = "avx512f")]
16806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16807#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16808#[rustc_legacy_const_generics(4)]
16809pub unsafe fn _mm512_mask_i32gather_epi64<const SCALE: i32>(
16810 src: __m512i,
16811 mask: __mmask8,
16812 offsets: __m256i,
16813 slice: *const i64,
16814) -> __m512i {
16815 static_assert_imm8_scale!(SCALE);
16816 let src: i64x8 = src.as_i64x8();
16817 let mask: i8 = mask as i8;
16818 let slice: *const i8 = slice as *const i8;
16819 let offsets: i32x8 = offsets.as_i32x8();
16820 let r: i64x8 = vpgatherdq(src, slice, offsets, mask, SCALE);
16821 transmute(src:r)
16822}
16823
16824/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16825///
16826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084)
16827#[inline]
16828#[target_feature(enable = "avx512f")]
16829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16830#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16831#[rustc_legacy_const_generics(2)]
16832pub unsafe fn _mm512_i64gather_epi64<const SCALE: i32>(
16833 offsets: __m512i,
16834 slice: *const i64,
16835) -> __m512i {
16836 static_assert_imm8_scale!(SCALE);
16837 let zero: i64x8 = i64x8::ZERO;
16838 let neg_one: i8 = -1;
16839 let slice: *const i8 = slice as *const i8;
16840 let offsets: i64x8 = offsets.as_i64x8();
16841 let r: i64x8 = vpgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE);
16842 transmute(src:r)
16843}
16844
16845/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16846///
16847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085)
16848#[inline]
16849#[target_feature(enable = "avx512f")]
16850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16851#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16852#[rustc_legacy_const_generics(4)]
16853pub unsafe fn _mm512_mask_i64gather_epi64<const SCALE: i32>(
16854 src: __m512i,
16855 mask: __mmask8,
16856 offsets: __m512i,
16857 slice: *const i64,
16858) -> __m512i {
16859 static_assert_imm8_scale!(SCALE);
16860 let src: i64x8 = src.as_i64x8();
16861 let mask: i8 = mask as i8;
16862 let slice: *const i8 = slice as *const i8;
16863 let offsets: i64x8 = offsets.as_i64x8();
16864 let r: i64x8 = vpgatherqq(src, slice, offsets, mask, SCALE);
16865 transmute(src:r)
16866}
16867
16868/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16869///
16870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074)
16871#[inline]
16872#[target_feature(enable = "avx512f")]
16873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16874#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16875#[rustc_legacy_const_generics(2)]
16876pub unsafe fn _mm512_i64gather_epi32<const SCALE: i32>(
16877 offsets: __m512i,
16878 slice: *const i32,
16879) -> __m256i {
16880 static_assert_imm8_scale!(SCALE);
16881 let zeros: i32x8 = i32x8::ZERO;
16882 let neg_one: i8 = -1;
16883 let slice: *const i8 = slice as *const i8;
16884 let offsets: i64x8 = offsets.as_i64x8();
16885 let r: i32x8 = vpgatherqd(src:zeros, slice, offsets, mask:neg_one, SCALE);
16886 transmute(src:r)
16887}
16888
16889/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16890///
16891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075)
16892#[inline]
16893#[target_feature(enable = "avx512f")]
16894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16895#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16896#[rustc_legacy_const_generics(4)]
16897pub unsafe fn _mm512_mask_i64gather_epi32<const SCALE: i32>(
16898 src: __m256i,
16899 mask: __mmask8,
16900 offsets: __m512i,
16901 slice: *const i32,
16902) -> __m256i {
16903 static_assert_imm8_scale!(SCALE);
16904 let src: i32x8 = src.as_i32x8();
16905 let mask: i8 = mask as i8;
16906 let slice: *const i8 = slice as *const i8;
16907 let offsets: i64x8 = offsets.as_i64x8();
16908 let r: i32x8 = vpgatherqd(src, slice, offsets, mask, SCALE);
16909 transmute(src:r)
16910}
16911
16912/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16913///
16914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044)
16915#[inline]
16916#[target_feature(enable = "avx512f")]
16917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16918#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16919#[rustc_legacy_const_generics(3)]
16920pub unsafe fn _mm512_i32scatter_pd<const SCALE: i32>(
16921 slice: *mut f64,
16922 offsets: __m256i,
16923 src: __m512d,
16924) {
16925 static_assert_imm8_scale!(SCALE);
16926 let src: f64x8 = src.as_f64x8();
16927 let neg_one: i8 = -1;
16928 let slice: *mut i8 = slice as *mut i8;
16929 let offsets: i32x8 = offsets.as_i32x8();
16930 vscatterdpd(slice, mask:neg_one, offsets, src, SCALE);
16931}
16932
16933/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16934///
16935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045)
16936#[inline]
16937#[target_feature(enable = "avx512f")]
16938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16939#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16940#[rustc_legacy_const_generics(4)]
16941pub unsafe fn _mm512_mask_i32scatter_pd<const SCALE: i32>(
16942 slice: *mut f64,
16943 mask: __mmask8,
16944 offsets: __m256i,
16945 src: __m512d,
16946) {
16947 static_assert_imm8_scale!(SCALE);
16948 let src: f64x8 = src.as_f64x8();
16949 let slice: *mut i8 = slice as *mut i8;
16950 let offsets: i32x8 = offsets.as_i32x8();
16951 vscatterdpd(slice, mask as i8, offsets, src, SCALE);
16952}
16953
16954/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16955///
16956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122)
16957#[inline]
16958#[target_feature(enable = "avx512f")]
16959#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16960#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16961#[rustc_legacy_const_generics(3)]
16962pub unsafe fn _mm512_i64scatter_pd<const SCALE: i32>(
16963 slice: *mut f64,
16964 offsets: __m512i,
16965 src: __m512d,
16966) {
16967 static_assert_imm8_scale!(SCALE);
16968 let src: f64x8 = src.as_f64x8();
16969 let neg_one: i8 = -1;
16970 let slice: *mut i8 = slice as *mut i8;
16971 let offsets: i64x8 = offsets.as_i64x8();
16972 vscatterqpd(slice, mask:neg_one, offsets, src, SCALE);
16973}
16974
16975/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16976///
16977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123)
16978#[inline]
16979#[target_feature(enable = "avx512f")]
16980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
16981#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16982#[rustc_legacy_const_generics(4)]
16983pub unsafe fn _mm512_mask_i64scatter_pd<const SCALE: i32>(
16984 slice: *mut f64,
16985 mask: __mmask8,
16986 offsets: __m512i,
16987 src: __m512d,
16988) {
16989 static_assert_imm8_scale!(SCALE);
16990 let src: f64x8 = src.as_f64x8();
16991 let slice: *mut i8 = slice as *mut i8;
16992 let offsets: i64x8 = offsets.as_i64x8();
16993 vscatterqpd(slice, mask as i8, offsets, src, SCALE);
16994}
16995
16996/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16997///
16998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050)
16999#[inline]
17000#[target_feature(enable = "avx512f")]
17001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17002#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17003#[rustc_legacy_const_generics(3)]
17004pub unsafe fn _mm512_i32scatter_ps<const SCALE: i32>(
17005 slice: *mut f32,
17006 offsets: __m512i,
17007 src: __m512,
17008) {
17009 static_assert_imm8_scale!(SCALE);
17010 let src: f32x16 = src.as_f32x16();
17011 let neg_one: i16 = -1;
17012 let slice: *mut i8 = slice as *mut i8;
17013 let offsets: i32x16 = offsets.as_i32x16();
17014 vscatterdps(slice, mask:neg_one, offsets, src, SCALE);
17015}
17016
17017/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17018///
17019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051)
17020#[inline]
17021#[target_feature(enable = "avx512f")]
17022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17023#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17024#[rustc_legacy_const_generics(4)]
17025pub unsafe fn _mm512_mask_i32scatter_ps<const SCALE: i32>(
17026 slice: *mut f32,
17027 mask: __mmask16,
17028 offsets: __m512i,
17029 src: __m512,
17030) {
17031 static_assert_imm8_scale!(SCALE);
17032 let src: f32x16 = src.as_f32x16();
17033 let slice: *mut i8 = slice as *mut i8;
17034 let offsets: i32x16 = offsets.as_i32x16();
17035 vscatterdps(slice, mask as i16, offsets, src, SCALE);
17036}
17037
17038/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17039///
17040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128)
17041#[inline]
17042#[target_feature(enable = "avx512f")]
17043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17044#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17045#[rustc_legacy_const_generics(3)]
17046pub unsafe fn _mm512_i64scatter_ps<const SCALE: i32>(
17047 slice: *mut f32,
17048 offsets: __m512i,
17049 src: __m256,
17050) {
17051 static_assert_imm8_scale!(SCALE);
17052 let src: f32x8 = src.as_f32x8();
17053 let neg_one: i8 = -1;
17054 let slice: *mut i8 = slice as *mut i8;
17055 let offsets: i64x8 = offsets.as_i64x8();
17056 vscatterqps(slice, mask:neg_one, offsets, src, SCALE);
17057}
17058
17059/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17060///
17061/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129)
17062#[inline]
17063#[target_feature(enable = "avx512f")]
17064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17065#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17066#[rustc_legacy_const_generics(4)]
17067pub unsafe fn _mm512_mask_i64scatter_ps<const SCALE: i32>(
17068 slice: *mut f32,
17069 mask: __mmask8,
17070 offsets: __m512i,
17071 src: __m256,
17072) {
17073 static_assert_imm8_scale!(SCALE);
17074 let src: f32x8 = src.as_f32x8();
17075 let slice: *mut i8 = slice as *mut i8;
17076 let offsets: i64x8 = offsets.as_i64x8();
17077 vscatterqps(slice, mask as i8, offsets, src, SCALE);
17078}
17079
17080/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17081///
17082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038)
17083#[inline]
17084#[target_feature(enable = "avx512f")]
17085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17086#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17087#[rustc_legacy_const_generics(3)]
17088pub unsafe fn _mm512_i32scatter_epi64<const SCALE: i32>(
17089 slice: *mut i64,
17090 offsets: __m256i,
17091 src: __m512i,
17092) {
17093 static_assert_imm8_scale!(SCALE);
17094 let src: i64x8 = src.as_i64x8();
17095 let neg_one: i8 = -1;
17096 let slice: *mut i8 = slice as *mut i8;
17097 let offsets: i32x8 = offsets.as_i32x8();
17098 vpscatterdq(slice, mask:neg_one, offsets, src, SCALE);
17099}
17100
17101/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17102///
17103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039)
17104#[inline]
17105#[target_feature(enable = "avx512f")]
17106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17107#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17108#[rustc_legacy_const_generics(4)]
17109pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
17110 slice: *mut i64,
17111 mask: __mmask8,
17112 offsets: __m256i,
17113 src: __m512i,
17114) {
17115 static_assert_imm8_scale!(SCALE);
17116 let src: i64x8 = src.as_i64x8();
17117 let mask: i8 = mask as i8;
17118 let slice: *mut i8 = slice as *mut i8;
17119 let offsets: i32x8 = offsets.as_i32x8();
17120 vpscatterdq(slice, mask, offsets, src, SCALE);
17121}
17122
17123/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17124///
17125/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
17126#[inline]
17127#[target_feature(enable = "avx512f")]
17128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17129#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17130#[rustc_legacy_const_generics(3)]
17131pub unsafe fn _mm512_i64scatter_epi64<const SCALE: i32>(
17132 slice: *mut i64,
17133 offsets: __m512i,
17134 src: __m512i,
17135) {
17136 static_assert_imm8_scale!(SCALE);
17137 let src: i64x8 = src.as_i64x8();
17138 let neg_one: i8 = -1;
17139 let slice: *mut i8 = slice as *mut i8;
17140 let offsets: i64x8 = offsets.as_i64x8();
17141 vpscatterqq(slice, mask:neg_one, offsets, src, SCALE);
17142}
17143
17144/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17145///
17146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117)
17147#[inline]
17148#[target_feature(enable = "avx512f")]
17149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17150#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17151#[rustc_legacy_const_generics(4)]
17152pub unsafe fn _mm512_mask_i64scatter_epi64<const SCALE: i32>(
17153 slice: *mut i64,
17154 mask: __mmask8,
17155 offsets: __m512i,
17156 src: __m512i,
17157) {
17158 static_assert_imm8_scale!(SCALE);
17159 let src: i64x8 = src.as_i64x8();
17160 let mask: i8 = mask as i8;
17161 let slice: *mut i8 = slice as *mut i8;
17162 let offsets: i64x8 = offsets.as_i64x8();
17163 vpscatterqq(slice, mask, offsets, src, SCALE);
17164}
17165
17166/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17167///
17168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032)
17169#[inline]
17170#[target_feature(enable = "avx512f")]
17171#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17172#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17173#[rustc_legacy_const_generics(3)]
17174pub unsafe fn _mm512_i32scatter_epi32<const SCALE: i32>(
17175 slice: *mut i32,
17176 offsets: __m512i,
17177 src: __m512i,
17178) {
17179 static_assert_imm8_scale!(SCALE);
17180 let src: i32x16 = src.as_i32x16();
17181 let neg_one: i16 = -1;
17182 let slice: *mut i8 = slice as *mut i8;
17183 let offsets: i32x16 = offsets.as_i32x16();
17184 vpscatterdd(slice, mask:neg_one, offsets, src, SCALE);
17185}
17186
17187/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17188///
17189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033)
17190#[inline]
17191#[target_feature(enable = "avx512f")]
17192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17193#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17194#[rustc_legacy_const_generics(4)]
17195pub unsafe fn _mm512_mask_i32scatter_epi32<const SCALE: i32>(
17196 slice: *mut i32,
17197 mask: __mmask16,
17198 offsets: __m512i,
17199 src: __m512i,
17200) {
17201 static_assert_imm8_scale!(SCALE);
17202 let src: i32x16 = src.as_i32x16();
17203 let mask: i16 = mask as i16;
17204 let slice: *mut i8 = slice as *mut i8;
17205 let offsets: i32x16 = offsets.as_i32x16();
17206 vpscatterdd(slice, mask, offsets, src, SCALE);
17207}
17208
17209/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17210///
17211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108)
17212#[inline]
17213#[target_feature(enable = "avx512f")]
17214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17215#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17216#[rustc_legacy_const_generics(3)]
17217pub unsafe fn _mm512_i64scatter_epi32<const SCALE: i32>(
17218 slice: *mut i32,
17219 offsets: __m512i,
17220 src: __m256i,
17221) {
17222 static_assert_imm8_scale!(SCALE);
17223 let src: i32x8 = src.as_i32x8();
17224 let neg_one: i8 = -1;
17225 let slice: *mut i8 = slice as *mut i8;
17226 let offsets: i64x8 = offsets.as_i64x8();
17227 vpscatterqd(slice, mask:neg_one, offsets, src, SCALE);
17228}
17229
17230/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17231///
17232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109)
17233#[inline]
17234#[target_feature(enable = "avx512f")]
17235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17236#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17237#[rustc_legacy_const_generics(4)]
17238pub unsafe fn _mm512_mask_i64scatter_epi32<const SCALE: i32>(
17239 slice: *mut i32,
17240 mask: __mmask8,
17241 offsets: __m512i,
17242 src: __m256i,
17243) {
17244 static_assert_imm8_scale!(SCALE);
17245 let src: i32x8 = src.as_i32x8();
17246 let mask: i8 = mask as i8;
17247 let slice: *mut i8 = slice as *mut i8;
17248 let offsets: i64x8 = offsets.as_i64x8();
17249 vpscatterqd(slice, mask, offsets, src, SCALE);
17250}
17251
17252/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17253/// indices stored in the lower half of vindex scaled by scale and stores them in dst.
17254///
17255/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
17256#[inline]
17257#[target_feature(enable = "avx512f")]
17258#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17259#[rustc_legacy_const_generics(2)]
17260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17261pub unsafe fn _mm512_i32logather_epi64<const SCALE: i32>(
17262 vindex: __m512i,
17263 base_addr: *const i64,
17264) -> __m512i {
17265 _mm512_i32gather_epi64::<SCALE>(offsets:_mm512_castsi512_si256(vindex), slice:base_addr)
17266}
17267
17268/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17269/// indices stored in the lower half of vindex scaled by scale and stores them in dst using writemask k
17270/// (elements are copied from src when the corresponding mask bit is not set).
17271///
17272/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
17273#[inline]
17274#[target_feature(enable = "avx512f")]
17275#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17276#[rustc_legacy_const_generics(4)]
17277#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17278pub unsafe fn _mm512_mask_i32logather_epi64<const SCALE: i32>(
17279 src: __m512i,
17280 k: __mmask8,
17281 vindex: __m512i,
17282 base_addr: *const i64,
17283) -> __m512i {
17284 _mm512_mask_i32gather_epi64::<SCALE>(src, mask:k, offsets:_mm512_castsi512_si256(vindex), slice:base_addr)
17285}
17286
17287/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17288/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst.
17289///
17290/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
17291#[inline]
17292#[target_feature(enable = "avx512f")]
17293#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17294#[rustc_legacy_const_generics(2)]
17295#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17296pub unsafe fn _mm512_i32logather_pd<const SCALE: i32>(
17297 vindex: __m512i,
17298 base_addr: *const f64,
17299) -> __m512d {
17300 _mm512_i32gather_pd::<SCALE>(offsets:_mm512_castsi512_si256(vindex), slice:base_addr)
17301}
17302
17303/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17304/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst
17305/// using writemask k (elements are copied from src when the corresponding mask bit is not set).
17306///
17307/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
17308#[inline]
17309#[target_feature(enable = "avx512f")]
17310#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17311#[rustc_legacy_const_generics(4)]
17312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17313pub unsafe fn _mm512_mask_i32logather_pd<const SCALE: i32>(
17314 src: __m512d,
17315 k: __mmask8,
17316 vindex: __m512i,
17317 base_addr: *const f64,
17318) -> __m512d {
17319 _mm512_mask_i32gather_pd::<SCALE>(src, mask:k, offsets:_mm512_castsi512_si256(vindex), slice:base_addr)
17320}
17321
17322/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17323/// indices stored in the lower half of vindex scaled by scale.
17324///
17325/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
17326#[inline]
17327#[target_feature(enable = "avx512f")]
17328#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17329#[rustc_legacy_const_generics(3)]
17330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17331pub unsafe fn _mm512_i32loscatter_epi64<const SCALE: i32>(
17332 base_addr: *mut i64,
17333 vindex: __m512i,
17334 a: __m512i,
17335) {
17336 _mm512_i32scatter_epi64::<SCALE>(slice:base_addr, offsets:_mm512_castsi512_si256(vindex), src:a)
17337}
17338
17339/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17340/// indices stored in the lower half of vindex scaled by scale using writemask k (elements whose corresponding
17341/// mask bit is not set are not written to memory).
17342///
17343/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
17344#[inline]
17345#[target_feature(enable = "avx512f")]
17346#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17347#[rustc_legacy_const_generics(4)]
17348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17349pub unsafe fn _mm512_mask_i32loscatter_epi64<const SCALE: i32>(
17350 base_addr: *mut i64,
17351 k: __mmask8,
17352 vindex: __m512i,
17353 a: __m512i,
17354) {
17355 _mm512_mask_i32scatter_epi64::<SCALE>(slice:base_addr, mask:k, offsets:_mm512_castsi512_si256(vindex), src:a)
17356}
17357
17358/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17359/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale.
17360///
17361/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
17362#[inline]
17363#[target_feature(enable = "avx512f")]
17364#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17365#[rustc_legacy_const_generics(3)]
17366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17367pub unsafe fn _mm512_i32loscatter_pd<const SCALE: i32>(
17368 base_addr: *mut f64,
17369 vindex: __m512i,
17370 a: __m512d,
17371) {
17372 _mm512_i32scatter_pd::<SCALE>(slice:base_addr, offsets:_mm512_castsi512_si256(vindex), src:a)
17373}
17374
17375/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17376/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale using writemask k
17377/// (elements whose corresponding mask bit is not set are not written to memory).
17378///
17379/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
17380#[inline]
17381#[target_feature(enable = "avx512f")]
17382#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17383#[rustc_legacy_const_generics(4)]
17384#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17385pub unsafe fn _mm512_mask_i32loscatter_pd<const SCALE: i32>(
17386 base_addr: *mut f64,
17387 k: __mmask8,
17388 vindex: __m512i,
17389 a: __m512d,
17390) {
17391 _mm512_mask_i32scatter_pd::<SCALE>(slice:base_addr, mask:k, offsets:_mm512_castsi512_si256(vindex), src:a)
17392}
17393
17394/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17395/// indices stored in vindex scaled by scale
17396///
17397/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32)
17398#[inline]
17399#[target_feature(enable = "avx512f,avx512vl")]
17400#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17401#[rustc_legacy_const_generics(3)]
17402#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17403pub unsafe fn _mm256_i32scatter_epi32<const SCALE: i32>(
17404 base_addr: *mut i32,
17405 vindex: __m256i,
17406 a: __m256i,
17407) {
17408 static_assert_imm8_scale!(SCALE);
17409 vpscatterdd_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x8(), src:a.as_i32x8(), SCALE)
17410}
17411
17412/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17413/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17414/// are not written to memory).
17415///
17416/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32)
17417#[inline]
17418#[target_feature(enable = "avx512f,avx512vl")]
17419#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17420#[rustc_legacy_const_generics(4)]
17421#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17422pub unsafe fn _mm256_mask_i32scatter_epi32<const SCALE: i32>(
17423 base_addr: *mut i32,
17424 k: __mmask8,
17425 vindex: __m256i,
17426 a: __m256i,
17427) {
17428 static_assert_imm8_scale!(SCALE);
17429 vpscatterdd_256(slice:base_addr as _, k, offsets:vindex.as_i32x8(), src:a.as_i32x8(), SCALE)
17430}
17431
17432/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17433///
17434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
17435#[inline]
17436#[target_feature(enable = "avx512f,avx512vl")]
17437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17438#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17439#[rustc_legacy_const_generics(3)]
17440pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
17441 slice: *mut i64,
17442 offsets: __m128i,
17443 src: __m256i,
17444) {
17445 static_assert_imm8_scale!(SCALE);
17446 let src: i64x4 = src.as_i64x4();
17447 let slice: *mut i8 = slice as *mut i8;
17448 let offsets: i32x4 = offsets.as_i32x4();
17449 vpscatterdq_256(slice, k:0xff, offsets, src, SCALE);
17450}
17451
17452/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17453/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17454/// are not written to memory).
17455///
17456/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi64)
17457#[inline]
17458#[target_feature(enable = "avx512f,avx512vl")]
17459#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17460#[rustc_legacy_const_generics(4)]
17461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17462pub unsafe fn _mm256_mask_i32scatter_epi64<const SCALE: i32>(
17463 base_addr: *mut i64,
17464 k: __mmask8,
17465 vindex: __m128i,
17466 a: __m256i,
17467) {
17468 static_assert_imm8_scale!(SCALE);
17469 vpscatterdq_256(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_i64x4(), SCALE)
17470}
17471
17472/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17473/// at packed 32-bit integer indices stored in vindex scaled by scale
17474///
17475/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd)
17476#[inline]
17477#[target_feature(enable = "avx512f,avx512vl")]
17478#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17479#[rustc_legacy_const_generics(3)]
17480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17481pub unsafe fn _mm256_i32scatter_pd<const SCALE: i32>(
17482 base_addr: *mut f64,
17483 vindex: __m128i,
17484 a: __m256d,
17485) {
17486 static_assert_imm8_scale!(SCALE);
17487 vscatterdpd_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x4(), src:a.as_f64x4(), SCALE)
17488}
17489
17490/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17491/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17492/// mask bit is not set are not written to memory).
17493///
17494/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_pd)
17495#[inline]
17496#[target_feature(enable = "avx512f,avx512vl")]
17497#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17498#[rustc_legacy_const_generics(4)]
17499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17500pub unsafe fn _mm256_mask_i32scatter_pd<const SCALE: i32>(
17501 base_addr: *mut f64,
17502 k: __mmask8,
17503 vindex: __m128i,
17504 a: __m256d,
17505) {
17506 static_assert_imm8_scale!(SCALE);
17507 vscatterdpd_256(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_f64x4(), SCALE)
17508}
17509
17510/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17511/// at packed 32-bit integer indices stored in vindex scaled by scale
17512///
17513/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps)
17514#[inline]
17515#[target_feature(enable = "avx512f,avx512vl")]
17516#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17517#[rustc_legacy_const_generics(3)]
17518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17519pub unsafe fn _mm256_i32scatter_ps<const SCALE: i32>(
17520 base_addr: *mut f32,
17521 vindex: __m256i,
17522 a: __m256,
17523) {
17524 static_assert_imm8_scale!(SCALE);
17525 vscatterdps_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x8(), src:a.as_f32x8(), SCALE)
17526}
17527
17528/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17529/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17530/// mask bit is not set are not written to memory).
17531///
17532/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps)
17533#[inline]
17534#[target_feature(enable = "avx512f,avx512vl")]
17535#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17536#[rustc_legacy_const_generics(4)]
17537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17538pub unsafe fn _mm256_mask_i32scatter_ps<const SCALE: i32>(
17539 base_addr: *mut f32,
17540 k: __mmask8,
17541 vindex: __m256i,
17542 a: __m256,
17543) {
17544 static_assert_imm8_scale!(SCALE);
17545 vscatterdps_256(slice:base_addr as _, k, offsets:vindex.as_i32x8(), src:a.as_f32x8(), SCALE)
17546}
17547
17548/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17549/// indices stored in vindex scaled by scale
17550///
17551/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32)
17552#[inline]
17553#[target_feature(enable = "avx512f,avx512vl")]
17554#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17555#[rustc_legacy_const_generics(3)]
17556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17557pub unsafe fn _mm256_i64scatter_epi32<const SCALE: i32>(
17558 base_addr: *mut i32,
17559 vindex: __m256i,
17560 a: __m128i,
17561) {
17562 static_assert_imm8_scale!(SCALE);
17563 vpscatterqd_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x4(), src:a.as_i32x4(), SCALE)
17564}
17565
17566/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17567/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17568/// are not written to memory).
17569///
17570/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32)
17571#[inline]
17572#[target_feature(enable = "avx512f,avx512vl")]
17573#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17574#[rustc_legacy_const_generics(4)]
17575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17576pub unsafe fn _mm256_mask_i64scatter_epi32<const SCALE: i32>(
17577 base_addr: *mut i32,
17578 k: __mmask8,
17579 vindex: __m256i,
17580 a: __m128i,
17581) {
17582 static_assert_imm8_scale!(SCALE);
17583 vpscatterqd_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_i32x4(), SCALE)
17584}
17585
17586/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17587/// indices stored in vindex scaled by scale
17588///
17589/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64)
17590#[inline]
17591#[target_feature(enable = "avx512f,avx512vl")]
17592#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17593#[rustc_legacy_const_generics(3)]
17594#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17595pub unsafe fn _mm256_i64scatter_epi64<const SCALE: i32>(
17596 base_addr: *mut i64,
17597 vindex: __m256i,
17598 a: __m256i,
17599) {
17600 static_assert_imm8_scale!(SCALE);
17601 vpscatterqq_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x4(), src:a.as_i64x4(), SCALE)
17602}
17603
17604/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17605/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17606/// are not written to memory).
17607///
17608/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64)
17609#[inline]
17610#[target_feature(enable = "avx512f,avx512vl")]
17611#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17612#[rustc_legacy_const_generics(4)]
17613#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17614pub unsafe fn _mm256_mask_i64scatter_epi64<const SCALE: i32>(
17615 base_addr: *mut i64,
17616 k: __mmask8,
17617 vindex: __m256i,
17618 a: __m256i,
17619) {
17620 static_assert_imm8_scale!(SCALE);
17621 vpscatterqq_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_i64x4(), SCALE)
17622}
17623
17624/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17625/// at packed 64-bit integer indices stored in vindex scaled by scale
17626///
17627/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd)
17628#[inline]
17629#[target_feature(enable = "avx512f,avx512vl")]
17630#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17631#[rustc_legacy_const_generics(3)]
17632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17633pub unsafe fn _mm256_i64scatter_pd<const SCALE: i32>(
17634 base_addr: *mut f64,
17635 vindex: __m256i,
17636 a: __m256d,
17637) {
17638 static_assert_imm8_scale!(SCALE);
17639 vscatterqpd_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x4(), src:a.as_f64x4(), SCALE)
17640}
17641
17642/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17643/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17644/// mask bit is not set are not written to memory).
17645///
17646/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd)
17647#[inline]
17648#[target_feature(enable = "avx512f,avx512vl")]
17649#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17650#[rustc_legacy_const_generics(4)]
17651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17652pub unsafe fn _mm256_mask_i64scatter_pd<const SCALE: i32>(
17653 base_addr: *mut f64,
17654 k: __mmask8,
17655 vindex: __m256i,
17656 a: __m256d,
17657) {
17658 static_assert_imm8_scale!(SCALE);
17659 vscatterqpd_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_f64x4(), SCALE)
17660}
17661
17662/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17663/// at packed 64-bit integer indices stored in vindex scaled by scale
17664///
17665/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps)
17666#[inline]
17667#[target_feature(enable = "avx512f,avx512vl")]
17668#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17669#[rustc_legacy_const_generics(3)]
17670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17671pub unsafe fn _mm256_i64scatter_ps<const SCALE: i32>(
17672 base_addr: *mut f32,
17673 vindex: __m256i,
17674 a: __m128,
17675) {
17676 static_assert_imm8_scale!(SCALE);
17677 vscatterqps_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x4(), src:a.as_f32x4(), SCALE)
17678}
17679
17680/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17681/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17682/// mask bit is not set are not written to memory).
17683///
17684/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps)
17685#[inline]
17686#[target_feature(enable = "avx512f,avx512vl")]
17687#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17688#[rustc_legacy_const_generics(4)]
17689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17690pub unsafe fn _mm256_mask_i64scatter_ps<const SCALE: i32>(
17691 base_addr: *mut f32,
17692 k: __mmask8,
17693 vindex: __m256i,
17694 a: __m128,
17695) {
17696 static_assert_imm8_scale!(SCALE);
17697 vscatterqps_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_f32x4(), SCALE)
17698}
17699
17700/// Loads 8 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17701/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17702/// mask bit is not set).
17703///
17704/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32)
17705#[inline]
17706#[target_feature(enable = "avx512f,avx512vl")]
17707#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
17708#[rustc_legacy_const_generics(4)]
17709#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17710pub unsafe fn _mm256_mmask_i32gather_epi32<const SCALE: i32>(
17711 src: __m256i,
17712 k: __mmask8,
17713 vindex: __m256i,
17714 base_addr: *const i32,
17715) -> __m256i {
17716 static_assert_imm8_scale!(SCALE);
17717 transmute(src:vpgatherdd_256(
17718 src.as_i32x8(),
17719 slice:base_addr as _,
17720 offsets:vindex.as_i32x8(),
17721 k,
17722 SCALE,
17723 ))
17724}
17725
17726/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17727/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17728/// mask bit is not set).
17729///
17730/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64)
17731#[inline]
17732#[target_feature(enable = "avx512f,avx512vl")]
17733#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17734#[rustc_legacy_const_generics(4)]
17735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17736pub unsafe fn _mm256_mmask_i32gather_epi64<const SCALE: i32>(
17737 src: __m256i,
17738 k: __mmask8,
17739 vindex: __m128i,
17740 base_addr: *const i64,
17741) -> __m256i {
17742 static_assert_imm8_scale!(SCALE);
17743 transmute(src:vpgatherdq_256(
17744 src.as_i64x4(),
17745 slice:base_addr as _,
17746 offsets:vindex.as_i32x4(),
17747 k,
17748 SCALE,
17749 ))
17750}
17751
17752/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17753/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17754/// from src when the corresponding mask bit is not set).
17755///
17756/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd)
17757#[inline]
17758#[target_feature(enable = "avx512f,avx512vl")]
17759#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17760#[rustc_legacy_const_generics(4)]
17761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17762pub unsafe fn _mm256_mmask_i32gather_pd<const SCALE: i32>(
17763 src: __m256d,
17764 k: __mmask8,
17765 vindex: __m128i,
17766 base_addr: *const f64,
17767) -> __m256d {
17768 static_assert_imm8_scale!(SCALE);
17769 transmute(src:vgatherdpd_256(
17770 src.as_f64x4(),
17771 slice:base_addr as _,
17772 offsets:vindex.as_i32x4(),
17773 k,
17774 SCALE,
17775 ))
17776}
17777
17778/// Loads 8 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17779/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17780/// from src when the corresponding mask bit is not set).
17781///
17782/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps)
17783#[inline]
17784#[target_feature(enable = "avx512f,avx512vl")]
17785#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
17786#[rustc_legacy_const_generics(4)]
17787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17788pub unsafe fn _mm256_mmask_i32gather_ps<const SCALE: i32>(
17789 src: __m256,
17790 k: __mmask8,
17791 vindex: __m256i,
17792 base_addr: *const f32,
17793) -> __m256 {
17794 static_assert_imm8_scale!(SCALE);
17795 transmute(src:vgatherdps_256(
17796 src.as_f32x8(),
17797 slice:base_addr as _,
17798 offsets:vindex.as_i32x8(),
17799 k,
17800 SCALE,
17801 ))
17802}
17803
17804/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
17805/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17806/// mask bit is not set).
17807///
17808/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32)
17809#[inline]
17810#[target_feature(enable = "avx512f,avx512vl")]
17811#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
17812#[rustc_legacy_const_generics(4)]
17813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17814pub unsafe fn _mm256_mmask_i64gather_epi32<const SCALE: i32>(
17815 src: __m128i,
17816 k: __mmask8,
17817 vindex: __m256i,
17818 base_addr: *const i32,
17819) -> __m128i {
17820 static_assert_imm8_scale!(SCALE);
17821 transmute(src:vpgatherqd_256(
17822 src.as_i32x4(),
17823 slice:base_addr as _,
17824 offsets:vindex.as_i64x4(),
17825 k,
17826 SCALE,
17827 ))
17828}
17829
17830/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17831/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17832/// mask bit is not set).
17833///
17834/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
17835#[inline]
17836#[target_feature(enable = "avx512f,avx512vl")]
17837#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
17838#[rustc_legacy_const_generics(4)]
17839#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17840pub unsafe fn _mm256_mmask_i64gather_epi64<const SCALE: i32>(
17841 src: __m256i,
17842 k: __mmask8,
17843 vindex: __m256i,
17844 base_addr: *const i64,
17845) -> __m256i {
17846 static_assert_imm8_scale!(SCALE);
17847 transmute(src:vpgatherqq_256(
17848 src.as_i64x4(),
17849 slice:base_addr as _,
17850 offsets:vindex.as_i64x4(),
17851 k,
17852 SCALE,
17853 ))
17854}
17855
17856/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17857/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17858/// from src when the corresponding mask bit is not set).
17859///
17860/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
17861#[inline]
17862#[target_feature(enable = "avx512f,avx512vl")]
17863#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
17864#[rustc_legacy_const_generics(4)]
17865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17866pub unsafe fn _mm256_mmask_i64gather_pd<const SCALE: i32>(
17867 src: __m256d,
17868 k: __mmask8,
17869 vindex: __m256i,
17870 base_addr: *const f64,
17871) -> __m256d {
17872 static_assert_imm8_scale!(SCALE);
17873 transmute(src:vgatherqpd_256(
17874 src.as_f64x4(),
17875 slice:base_addr as _,
17876 offsets:vindex.as_i64x4(),
17877 k,
17878 SCALE,
17879 ))
17880}
17881
17882/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17883/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17884/// from src when the corresponding mask bit is not set).
17885///
17886/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
17887#[inline]
17888#[target_feature(enable = "avx512f,avx512vl")]
17889#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
17890#[rustc_legacy_const_generics(4)]
17891#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17892pub unsafe fn _mm256_mmask_i64gather_ps<const SCALE: i32>(
17893 src: __m128,
17894 k: __mmask8,
17895 vindex: __m256i,
17896 base_addr: *const f32,
17897) -> __m128 {
17898 static_assert_imm8_scale!(SCALE);
17899 transmute(src:vgatherqps_256(
17900 src.as_f32x4(),
17901 slice:base_addr as _,
17902 offsets:vindex.as_i64x4(),
17903 k,
17904 SCALE,
17905 ))
17906}
17907
17908/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17909/// indices stored in vindex scaled by scale
17910///
17911/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
17912#[inline]
17913#[target_feature(enable = "avx512f,avx512vl")]
17914#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17915#[rustc_legacy_const_generics(3)]
17916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17917pub unsafe fn _mm_i32scatter_epi32<const SCALE: i32>(
17918 base_addr: *mut i32,
17919 vindex: __m128i,
17920 a: __m128i,
17921) {
17922 static_assert_imm8_scale!(SCALE);
17923 vpscatterdd_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x4(), src:a.as_i32x4(), SCALE)
17924}
17925
17926/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17927/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17928/// are not written to memory).
17929///
17930/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
17931#[inline]
17932#[target_feature(enable = "avx512f,avx512vl")]
17933#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17934#[rustc_legacy_const_generics(4)]
17935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17936pub unsafe fn _mm_mask_i32scatter_epi32<const SCALE: i32>(
17937 base_addr: *mut i32,
17938 k: __mmask8,
17939 vindex: __m128i,
17940 a: __m128i,
17941) {
17942 static_assert_imm8_scale!(SCALE);
17943 vpscatterdd_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_i32x4(), SCALE)
17944}
17945
17946/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17947/// indices stored in vindex scaled by scale
17948///
17949/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
17950#[inline]
17951#[target_feature(enable = "avx512f,avx512vl")]
17952#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17953#[rustc_legacy_const_generics(3)]
17954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17955pub unsafe fn _mm_i32scatter_epi64<const SCALE: i32>(
17956 base_addr: *mut i64,
17957 vindex: __m128i,
17958 a: __m128i,
17959) {
17960 static_assert_imm8_scale!(SCALE);
17961 vpscatterdq_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x4(), src:a.as_i64x2(), SCALE)
17962}
17963
17964/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17965/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17966/// are not written to memory).
17967///
17968/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
17969#[inline]
17970#[target_feature(enable = "avx512f,avx512vl")]
17971#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17972#[rustc_legacy_const_generics(4)]
17973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17974pub unsafe fn _mm_mask_i32scatter_epi64<const SCALE: i32>(
17975 base_addr: *mut i64,
17976 k: __mmask8,
17977 vindex: __m128i,
17978 a: __m128i,
17979) {
17980 static_assert_imm8_scale!(SCALE);
17981 vpscatterdq_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_i64x2(), SCALE)
17982}
17983
17984/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17985/// at packed 32-bit integer indices stored in vindex scaled by scale
17986///
17987/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
17988#[inline]
17989#[target_feature(enable = "avx512f,avx512vl")]
17990#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17991#[rustc_legacy_const_generics(3)]
17992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
17993pub unsafe fn _mm_i32scatter_pd<const SCALE: i32>(
17994 base_addr: *mut f64,
17995 vindex: __m128i,
17996 a: __m128d,
17997) {
17998 static_assert_imm8_scale!(SCALE);
17999 vscatterdpd_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x4(), src:a.as_f64x2(), SCALE)
18000}
18001
18002/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18003/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18004/// mask bit is not set are not written to memory).
18005///
18006/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
18007#[inline]
18008#[target_feature(enable = "avx512f,avx512vl")]
18009#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
18010#[rustc_legacy_const_generics(4)]
18011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18012pub unsafe fn _mm_mask_i32scatter_pd<const SCALE: i32>(
18013 base_addr: *mut f64,
18014 k: __mmask8,
18015 vindex: __m128i,
18016 a: __m128d,
18017) {
18018 static_assert_imm8_scale!(SCALE);
18019 vscatterdpd_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_f64x2(), SCALE)
18020}
18021
18022/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18023/// at packed 32-bit integer indices stored in vindex scaled by scale
18024///
18025/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps)
18026#[inline]
18027#[target_feature(enable = "avx512f,avx512vl")]
18028#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
18029#[rustc_legacy_const_generics(3)]
18030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18031pub unsafe fn _mm_i32scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18032 static_assert_imm8_scale!(SCALE);
18033 vscatterdps_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x4(), src:a.as_f32x4(), SCALE)
18034}
18035
18036/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18037/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18038/// mask bit is not set are not written to memory).
18039///
18040/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps)
18041#[inline]
18042#[target_feature(enable = "avx512f,avx512vl")]
18043#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
18044#[rustc_legacy_const_generics(4)]
18045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18046pub unsafe fn _mm_mask_i32scatter_ps<const SCALE: i32>(
18047 base_addr: *mut f32,
18048 k: __mmask8,
18049 vindex: __m128i,
18050 a: __m128,
18051) {
18052 static_assert_imm8_scale!(SCALE);
18053 vscatterdps_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_f32x4(), SCALE)
18054}
18055
18056/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18057/// indices stored in vindex scaled by scale
18058///
18059/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32)
18060#[inline]
18061#[target_feature(enable = "avx512f,avx512vl")]
18062#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18063#[rustc_legacy_const_generics(3)]
18064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18065pub unsafe fn _mm_i64scatter_epi32<const SCALE: i32>(
18066 base_addr: *mut i32,
18067 vindex: __m128i,
18068 a: __m128i,
18069) {
18070 static_assert_imm8_scale!(SCALE);
18071 vpscatterqd_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x2(), src:a.as_i32x4(), SCALE)
18072}
18073
18074/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18075/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18076/// are not written to memory).
18077///
18078/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32)
18079#[inline]
18080#[target_feature(enable = "avx512f,avx512vl")]
18081#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18082#[rustc_legacy_const_generics(4)]
18083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18084pub unsafe fn _mm_mask_i64scatter_epi32<const SCALE: i32>(
18085 base_addr: *mut i32,
18086 k: __mmask8,
18087 vindex: __m128i,
18088 a: __m128i,
18089) {
18090 static_assert_imm8_scale!(SCALE);
18091 vpscatterqd_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_i32x4(), SCALE)
18092}
18093
18094/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18095/// indices stored in vindex scaled by scale
18096///
18097/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
18098#[inline]
18099#[target_feature(enable = "avx512f,avx512vl")]
18100#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18101#[rustc_legacy_const_generics(3)]
18102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18103pub unsafe fn _mm_i64scatter_epi64<const SCALE: i32>(
18104 base_addr: *mut i64,
18105 vindex: __m128i,
18106 a: __m128i,
18107) {
18108 static_assert_imm8_scale!(SCALE);
18109 vpscatterqq_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x2(), src:a.as_i64x2(), SCALE)
18110}
18111
18112/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18113/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18114/// are not written to memory).
18115///
18116/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
18117#[inline]
18118#[target_feature(enable = "avx512f,avx512vl")]
18119#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18120#[rustc_legacy_const_generics(4)]
18121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18122pub unsafe fn _mm_mask_i64scatter_epi64<const SCALE: i32>(
18123 base_addr: *mut i64,
18124 k: __mmask8,
18125 vindex: __m128i,
18126 a: __m128i,
18127) {
18128 static_assert_imm8_scale!(SCALE);
18129 vpscatterqq_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_i64x2(), SCALE)
18130}
18131
18132/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18133/// at packed 64-bit integer indices stored in vindex scaled by scale
18134///
18135/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
18136#[inline]
18137#[target_feature(enable = "avx512f,avx512vl")]
18138#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18139#[rustc_legacy_const_generics(3)]
18140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18141pub unsafe fn _mm_i64scatter_pd<const SCALE: i32>(
18142 base_addr: *mut f64,
18143 vindex: __m128i,
18144 a: __m128d,
18145) {
18146 static_assert_imm8_scale!(SCALE);
18147 vscatterqpd_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x2(), src:a.as_f64x2(), SCALE)
18148}
18149
18150/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18151/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18152/// mask bit is not set are not written to memory).
18153///
18154/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
18155#[inline]
18156#[target_feature(enable = "avx512f,avx512vl")]
18157#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18158#[rustc_legacy_const_generics(4)]
18159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18160pub unsafe fn _mm_mask_i64scatter_pd<const SCALE: i32>(
18161 base_addr: *mut f64,
18162 k: __mmask8,
18163 vindex: __m128i,
18164 a: __m128d,
18165) {
18166 static_assert_imm8_scale!(SCALE);
18167 vscatterqpd_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_f64x2(), SCALE)
18168}
18169
18170/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18171/// at packed 64-bit integer indices stored in vindex scaled by scale
18172///
18173/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
18174#[inline]
18175#[target_feature(enable = "avx512f,avx512vl")]
18176#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18177#[rustc_legacy_const_generics(3)]
18178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18179pub unsafe fn _mm_i64scatter_ps<const SCALE: i32>(base_addr: *mut f32, vindex: __m128i, a: __m128) {
18180 static_assert_imm8_scale!(SCALE);
18181 vscatterqps_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x2(), src:a.as_f32x4(), SCALE)
18182}
18183
18184/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18185/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18186///
18187/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
18188#[inline]
18189#[target_feature(enable = "avx512f,avx512vl")]
18190#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18191#[rustc_legacy_const_generics(4)]
18192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18193pub unsafe fn _mm_mask_i64scatter_ps<const SCALE: i32>(
18194 base_addr: *mut f32,
18195 k: __mmask8,
18196 vindex: __m128i,
18197 a: __m128,
18198) {
18199 static_assert_imm8_scale!(SCALE);
18200 vscatterqps_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_f32x4(), SCALE)
18201}
18202
18203/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18204/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18205/// mask bit is not set).
18206///
18207/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
18208#[inline]
18209#[target_feature(enable = "avx512f,avx512vl")]
18210#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
18211#[rustc_legacy_const_generics(4)]
18212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18213pub unsafe fn _mm_mmask_i32gather_epi32<const SCALE: i32>(
18214 src: __m128i,
18215 k: __mmask8,
18216 vindex: __m128i,
18217 base_addr: *const i32,
18218) -> __m128i {
18219 static_assert_imm8_scale!(SCALE);
18220 transmute(src:vpgatherdd_128(
18221 src.as_i32x4(),
18222 slice:base_addr as _,
18223 offsets:vindex.as_i32x4(),
18224 k,
18225 SCALE,
18226 ))
18227}
18228
18229/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18230/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18231/// mask bit is not set).
18232///
18233/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
18234#[inline]
18235#[target_feature(enable = "avx512f,avx512vl")]
18236#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
18237#[rustc_legacy_const_generics(4)]
18238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18239pub unsafe fn _mm_mmask_i32gather_epi64<const SCALE: i32>(
18240 src: __m128i,
18241 k: __mmask8,
18242 vindex: __m128i,
18243 base_addr: *const i64,
18244) -> __m128i {
18245 static_assert_imm8_scale!(SCALE);
18246 transmute(src:vpgatherdq_128(
18247 src.as_i64x2(),
18248 slice:base_addr as _,
18249 offsets:vindex.as_i32x4(),
18250 k,
18251 SCALE,
18252 ))
18253}
18254
18255/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18256/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18257/// from src when the corresponding mask bit is not set).
18258///
18259/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
18260#[inline]
18261#[target_feature(enable = "avx512f,avx512vl")]
18262#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
18263#[rustc_legacy_const_generics(4)]
18264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18265pub unsafe fn _mm_mmask_i32gather_pd<const SCALE: i32>(
18266 src: __m128d,
18267 k: __mmask8,
18268 vindex: __m128i,
18269 base_addr: *const f64,
18270) -> __m128d {
18271 static_assert_imm8_scale!(SCALE);
18272 transmute(src:vgatherdpd_128(
18273 src.as_f64x2(),
18274 slice:base_addr as _,
18275 offsets:vindex.as_i32x4(),
18276 k,
18277 SCALE,
18278 ))
18279}
18280
18281/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18282/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18283/// from src when the corresponding mask bit is not set).
18284///
18285/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps)
18286#[inline]
18287#[target_feature(enable = "avx512f,avx512vl")]
18288#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
18289#[rustc_legacy_const_generics(4)]
18290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18291pub unsafe fn _mm_mmask_i32gather_ps<const SCALE: i32>(
18292 src: __m128,
18293 k: __mmask8,
18294 vindex: __m128i,
18295 base_addr: *const f32,
18296) -> __m128 {
18297 static_assert_imm8_scale!(SCALE);
18298 transmute(src:vgatherdps_128(
18299 src.as_f32x4(),
18300 slice:base_addr as _,
18301 offsets:vindex.as_i32x4(),
18302 k,
18303 SCALE,
18304 ))
18305}
18306
18307/// Loads 2 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18308/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18309/// mask bit is not set).
18310///
18311/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32)
18312#[inline]
18313#[target_feature(enable = "avx512f,avx512vl")]
18314#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
18315#[rustc_legacy_const_generics(4)]
18316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18317pub unsafe fn _mm_mmask_i64gather_epi32<const SCALE: i32>(
18318 src: __m128i,
18319 k: __mmask8,
18320 vindex: __m128i,
18321 base_addr: *const i32,
18322) -> __m128i {
18323 static_assert_imm8_scale!(SCALE);
18324 transmute(src:vpgatherqd_128(
18325 src.as_i32x4(),
18326 slice:base_addr as _,
18327 offsets:vindex.as_i64x2(),
18328 k,
18329 SCALE,
18330 ))
18331}
18332
18333/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18334/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18335/// mask bit is not set).
18336///
18337/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
18338#[inline]
18339#[target_feature(enable = "avx512f,avx512vl")]
18340#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
18341#[rustc_legacy_const_generics(4)]
18342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18343pub unsafe fn _mm_mmask_i64gather_epi64<const SCALE: i32>(
18344 src: __m128i,
18345 k: __mmask8,
18346 vindex: __m128i,
18347 base_addr: *const i64,
18348) -> __m128i {
18349 static_assert_imm8_scale!(SCALE);
18350 transmute(src:vpgatherqq_128(
18351 src.as_i64x2(),
18352 slice:base_addr as _,
18353 offsets:vindex.as_i64x2(),
18354 k,
18355 SCALE,
18356 ))
18357}
18358
18359/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18360/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18361/// from src when the corresponding mask bit is not set).
18362///
18363/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
18364#[inline]
18365#[target_feature(enable = "avx512f,avx512vl")]
18366#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
18367#[rustc_legacy_const_generics(4)]
18368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18369pub unsafe fn _mm_mmask_i64gather_pd<const SCALE: i32>(
18370 src: __m128d,
18371 k: __mmask8,
18372 vindex: __m128i,
18373 base_addr: *const f64,
18374) -> __m128d {
18375 static_assert_imm8_scale!(SCALE);
18376 transmute(src:vgatherqpd_128(
18377 src.as_f64x2(),
18378 slice:base_addr as _,
18379 offsets:vindex.as_i64x2(),
18380 k,
18381 SCALE,
18382 ))
18383}
18384
18385/// Loads 2 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18386/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18387/// from src when the corresponding mask bit is not set).
18388///
18389/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
18390#[inline]
18391#[target_feature(enable = "avx512f,avx512vl")]
18392#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
18393#[rustc_legacy_const_generics(4)]
18394#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18395pub unsafe fn _mm_mmask_i64gather_ps<const SCALE: i32>(
18396 src: __m128,
18397 k: __mmask8,
18398 vindex: __m128i,
18399 base_addr: *const f32,
18400) -> __m128 {
18401 static_assert_imm8_scale!(SCALE);
18402 transmute(src:vgatherqps_128(
18403 src.as_f32x4(),
18404 slice:base_addr as _,
18405 offsets:vindex.as_i64x2(),
18406 k,
18407 SCALE,
18408 ))
18409}
18410
18411/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18412///
18413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
18414#[inline]
18415#[target_feature(enable = "avx512f")]
18416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18417#[cfg_attr(test, assert_instr(vpcompressd))]
18418pub fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18419 unsafe { transmute(src:vpcompressd(a.as_i32x16(), src.as_i32x16(), mask:k)) }
18420}
18421
18422/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18423///
18424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199)
18425#[inline]
18426#[target_feature(enable = "avx512f")]
18427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18428#[cfg_attr(test, assert_instr(vpcompressd))]
18429pub fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
18430 unsafe { transmute(src:vpcompressd(a.as_i32x16(), src:i32x16::ZERO, mask:k)) }
18431}
18432
18433/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18434///
18435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196)
18436#[inline]
18437#[target_feature(enable = "avx512f,avx512vl")]
18438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18439#[cfg_attr(test, assert_instr(vpcompressd))]
18440pub fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18441 unsafe { transmute(src:vpcompressd256(a.as_i32x8(), src.as_i32x8(), mask:k)) }
18442}
18443
18444/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18445///
18446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197)
18447#[inline]
18448#[target_feature(enable = "avx512f,avx512vl")]
18449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18450#[cfg_attr(test, assert_instr(vpcompressd))]
18451pub fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
18452 unsafe { transmute(src:vpcompressd256(a.as_i32x8(), src:i32x8::ZERO, mask:k)) }
18453}
18454
18455/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18456///
18457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194)
18458#[inline]
18459#[target_feature(enable = "avx512f,avx512vl")]
18460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18461#[cfg_attr(test, assert_instr(vpcompressd))]
18462pub fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18463 unsafe { transmute(src:vpcompressd128(a.as_i32x4(), src.as_i32x4(), mask:k)) }
18464}
18465
18466/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18467///
18468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195)
18469#[inline]
18470#[target_feature(enable = "avx512f,avx512vl")]
18471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18472#[cfg_attr(test, assert_instr(vpcompressd))]
18473pub fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
18474 unsafe { transmute(src:vpcompressd128(a.as_i32x4(), src:i32x4::ZERO, mask:k)) }
18475}
18476
18477/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18478///
18479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204)
18480#[inline]
18481#[target_feature(enable = "avx512f")]
18482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18483#[cfg_attr(test, assert_instr(vpcompressq))]
18484pub fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18485 unsafe { transmute(src:vpcompressq(a.as_i64x8(), src.as_i64x8(), mask:k)) }
18486}
18487
18488/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18489///
18490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205)
18491#[inline]
18492#[target_feature(enable = "avx512f")]
18493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18494#[cfg_attr(test, assert_instr(vpcompressq))]
18495pub fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
18496 unsafe { transmute(src:vpcompressq(a.as_i64x8(), src:i64x8::ZERO, mask:k)) }
18497}
18498
18499/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18500///
18501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202)
18502#[inline]
18503#[target_feature(enable = "avx512f,avx512vl")]
18504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18505#[cfg_attr(test, assert_instr(vpcompressq))]
18506pub fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18507 unsafe { transmute(src:vpcompressq256(a.as_i64x4(), src.as_i64x4(), mask:k)) }
18508}
18509
18510/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18511///
18512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203)
18513#[inline]
18514#[target_feature(enable = "avx512f,avx512vl")]
18515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18516#[cfg_attr(test, assert_instr(vpcompressq))]
18517pub fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
18518 unsafe { transmute(src:vpcompressq256(a.as_i64x4(), src:i64x4::ZERO, mask:k)) }
18519}
18520
18521/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18522///
18523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200)
18524#[inline]
18525#[target_feature(enable = "avx512f,avx512vl")]
18526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18527#[cfg_attr(test, assert_instr(vpcompressq))]
18528pub fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18529 unsafe { transmute(src:vpcompressq128(a.as_i64x2(), src.as_i64x2(), mask:k)) }
18530}
18531
18532/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18533///
18534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201)
18535#[inline]
18536#[target_feature(enable = "avx512f,avx512vl")]
18537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18538#[cfg_attr(test, assert_instr(vpcompressq))]
18539pub fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
18540 unsafe { transmute(src:vpcompressq128(a.as_i64x2(), src:i64x2::ZERO, mask:k)) }
18541}
18542
18543/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18544///
18545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222)
18546#[inline]
18547#[target_feature(enable = "avx512f")]
18548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18549#[cfg_attr(test, assert_instr(vcompressps))]
18550pub fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18551 unsafe { transmute(src:vcompressps(a.as_f32x16(), src.as_f32x16(), mask:k)) }
18552}
18553
18554/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18555///
18556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223)
18557#[inline]
18558#[target_feature(enable = "avx512f")]
18559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18560#[cfg_attr(test, assert_instr(vcompressps))]
18561pub fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
18562 unsafe { transmute(src:vcompressps(a.as_f32x16(), src:f32x16::ZERO, mask:k)) }
18563}
18564
18565/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18566///
18567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220)
18568#[inline]
18569#[target_feature(enable = "avx512f,avx512vl")]
18570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18571#[cfg_attr(test, assert_instr(vcompressps))]
18572pub fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18573 unsafe { transmute(src:vcompressps256(a.as_f32x8(), src.as_f32x8(), mask:k)) }
18574}
18575
18576/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18577///
18578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221)
18579#[inline]
18580#[target_feature(enable = "avx512f,avx512vl")]
18581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18582#[cfg_attr(test, assert_instr(vcompressps))]
18583pub fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
18584 unsafe { transmute(src:vcompressps256(a.as_f32x8(), src:f32x8::ZERO, mask:k)) }
18585}
18586
18587/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18588///
18589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218)
18590#[inline]
18591#[target_feature(enable = "avx512f,avx512vl")]
18592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18593#[cfg_attr(test, assert_instr(vcompressps))]
18594pub fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18595 unsafe { transmute(src:vcompressps128(a.as_f32x4(), src.as_f32x4(), mask:k)) }
18596}
18597
18598/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18599///
18600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219)
18601#[inline]
18602#[target_feature(enable = "avx512f,avx512vl")]
18603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18604#[cfg_attr(test, assert_instr(vcompressps))]
18605pub fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
18606 unsafe { transmute(src:vcompressps128(a.as_f32x4(), src:f32x4::ZERO, mask:k)) }
18607}
18608
18609/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18610///
18611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216)
18612#[inline]
18613#[target_feature(enable = "avx512f")]
18614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18615#[cfg_attr(test, assert_instr(vcompresspd))]
18616pub fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
18617 unsafe { transmute(src:vcompresspd(a.as_f64x8(), src.as_f64x8(), mask:k)) }
18618}
18619
18620/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18621///
18622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217)
18623#[inline]
18624#[target_feature(enable = "avx512f")]
18625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18626#[cfg_attr(test, assert_instr(vcompresspd))]
18627pub fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
18628 unsafe { transmute(src:vcompresspd(a.as_f64x8(), src:f64x8::ZERO, mask:k)) }
18629}
18630
18631/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18632///
18633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214)
18634#[inline]
18635#[target_feature(enable = "avx512f,avx512vl")]
18636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18637#[cfg_attr(test, assert_instr(vcompresspd))]
18638pub fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
18639 unsafe { transmute(src:vcompresspd256(a.as_f64x4(), src.as_f64x4(), mask:k)) }
18640}
18641
18642/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18643///
18644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215)
18645#[inline]
18646#[target_feature(enable = "avx512f,avx512vl")]
18647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18648#[cfg_attr(test, assert_instr(vcompresspd))]
18649pub fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
18650 unsafe { transmute(src:vcompresspd256(a.as_f64x4(), src:f64x4::ZERO, mask:k)) }
18651}
18652
18653/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18654///
18655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212)
18656#[inline]
18657#[target_feature(enable = "avx512f,avx512vl")]
18658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18659#[cfg_attr(test, assert_instr(vcompresspd))]
18660pub fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
18661 unsafe { transmute(src:vcompresspd128(a.as_f64x2(), src.as_f64x2(), mask:k)) }
18662}
18663
18664/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18665///
18666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213)
18667#[inline]
18668#[target_feature(enable = "avx512f,avx512vl")]
18669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18670#[cfg_attr(test, assert_instr(vcompresspd))]
18671pub fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
18672 unsafe { transmute(src:vcompresspd128(a.as_f64x2(), src:f64x2::ZERO, mask:k)) }
18673}
18674
18675/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18676///
18677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32)
18678#[inline]
18679#[target_feature(enable = "avx512f")]
18680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18681#[cfg_attr(test, assert_instr(vpcompressd))]
18682pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask16, a: __m512i) {
18683 vcompressstored(mem:base_addr as *mut _, data:a.as_i32x16(), mask:k)
18684}
18685
18686/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18687///
18688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32)
18689#[inline]
18690#[target_feature(enable = "avx512f,avx512vl")]
18691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18692#[cfg_attr(test, assert_instr(vpcompressd))]
18693pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m256i) {
18694 vcompressstored256(mem:base_addr as *mut _, data:a.as_i32x8(), mask:k)
18695}
18696
18697/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18698///
18699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32)
18700#[inline]
18701#[target_feature(enable = "avx512f,avx512vl")]
18702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18703#[cfg_attr(test, assert_instr(vpcompressd))]
18704pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut i32, k: __mmask8, a: __m128i) {
18705 vcompressstored128(mem:base_addr as *mut _, data:a.as_i32x4(), mask:k)
18706}
18707
18708/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18709///
18710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64)
18711#[inline]
18712#[target_feature(enable = "avx512f")]
18713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18714#[cfg_attr(test, assert_instr(vpcompressq))]
18715pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m512i) {
18716 vcompressstoreq(mem:base_addr as *mut _, data:a.as_i64x8(), mask:k)
18717}
18718
18719/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18720///
18721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64)
18722#[inline]
18723#[target_feature(enable = "avx512f,avx512vl")]
18724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18725#[cfg_attr(test, assert_instr(vpcompressq))]
18726pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m256i) {
18727 vcompressstoreq256(mem:base_addr as *mut _, data:a.as_i64x4(), mask:k)
18728}
18729
18730/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18731///
18732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64)
18733#[inline]
18734#[target_feature(enable = "avx512f,avx512vl")]
18735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18736#[cfg_attr(test, assert_instr(vpcompressq))]
18737pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut i64, k: __mmask8, a: __m128i) {
18738 vcompressstoreq128(mem:base_addr as *mut _, data:a.as_i64x2(), mask:k)
18739}
18740
18741/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18742///
18743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps)
18744#[inline]
18745#[target_feature(enable = "avx512f")]
18746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18747#[cfg_attr(test, assert_instr(vcompressps))]
18748pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask16, a: __m512) {
18749 vcompressstoreps(mem:base_addr as *mut _, data:a.as_f32x16(), mask:k)
18750}
18751
18752/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18753///
18754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps)
18755#[inline]
18756#[target_feature(enable = "avx512f,avx512vl")]
18757#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18758#[cfg_attr(test, assert_instr(vcompressps))]
18759pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m256) {
18760 vcompressstoreps256(mem:base_addr as *mut _, data:a.as_f32x8(), mask:k)
18761}
18762
18763/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18764///
18765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps)
18766#[inline]
18767#[target_feature(enable = "avx512f,avx512vl")]
18768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18769#[cfg_attr(test, assert_instr(vcompressps))]
18770pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut f32, k: __mmask8, a: __m128) {
18771 vcompressstoreps128(mem:base_addr as *mut _, data:a.as_f32x4(), mask:k)
18772}
18773
18774/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18775///
18776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd)
18777#[inline]
18778#[target_feature(enable = "avx512f")]
18779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18780#[cfg_attr(test, assert_instr(vcompresspd))]
18781pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m512d) {
18782 vcompressstorepd(mem:base_addr as *mut _, data:a.as_f64x8(), mask:k)
18783}
18784
18785/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18786///
18787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd)
18788#[inline]
18789#[target_feature(enable = "avx512f,avx512vl")]
18790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18791#[cfg_attr(test, assert_instr(vcompresspd))]
18792pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m256d) {
18793 vcompressstorepd256(mem:base_addr as *mut _, data:a.as_f64x4(), mask:k)
18794}
18795
18796/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18797///
18798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd)
18799#[inline]
18800#[target_feature(enable = "avx512f,avx512vl")]
18801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18802#[cfg_attr(test, assert_instr(vcompresspd))]
18803pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut f64, k: __mmask8, a: __m128d) {
18804 vcompressstorepd128(mem:base_addr as *mut _, data:a.as_f64x2(), mask:k)
18805}
18806
18807/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18808///
18809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316)
18810#[inline]
18811#[target_feature(enable = "avx512f")]
18812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18813#[cfg_attr(test, assert_instr(vpexpandd))]
18814pub fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18815 unsafe { transmute(src:vpexpandd(a.as_i32x16(), src.as_i32x16(), mask:k)) }
18816}
18817
18818/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18819///
18820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317)
18821#[inline]
18822#[target_feature(enable = "avx512f")]
18823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18824#[cfg_attr(test, assert_instr(vpexpandd))]
18825pub fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
18826 unsafe { transmute(src:vpexpandd(a.as_i32x16(), src:i32x16::ZERO, mask:k)) }
18827}
18828
18829/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18830///
18831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314)
18832#[inline]
18833#[target_feature(enable = "avx512f,avx512vl")]
18834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18835#[cfg_attr(test, assert_instr(vpexpandd))]
18836pub fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18837 unsafe { transmute(src:vpexpandd256(a.as_i32x8(), src.as_i32x8(), mask:k)) }
18838}
18839
18840/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18841///
18842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315)
18843#[inline]
18844#[target_feature(enable = "avx512f,avx512vl")]
18845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18846#[cfg_attr(test, assert_instr(vpexpandd))]
18847pub fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
18848 unsafe { transmute(src:vpexpandd256(a.as_i32x8(), src:i32x8::ZERO, mask:k)) }
18849}
18850
18851/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18852///
18853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312)
18854#[inline]
18855#[target_feature(enable = "avx512f,avx512vl")]
18856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18857#[cfg_attr(test, assert_instr(vpexpandd))]
18858pub fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18859 unsafe { transmute(src:vpexpandd128(a.as_i32x4(), src.as_i32x4(), mask:k)) }
18860}
18861
18862/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18863///
18864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313)
18865#[inline]
18866#[target_feature(enable = "avx512f,avx512vl")]
18867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18868#[cfg_attr(test, assert_instr(vpexpandd))]
18869pub fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
18870 unsafe { transmute(src:vpexpandd128(a.as_i32x4(), src:i32x4::ZERO, mask:k)) }
18871}
18872
18873/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18874///
18875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322)
18876#[inline]
18877#[target_feature(enable = "avx512f")]
18878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18879#[cfg_attr(test, assert_instr(vpexpandq))]
18880pub fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18881 unsafe { transmute(src:vpexpandq(a.as_i64x8(), src.as_i64x8(), mask:k)) }
18882}
18883
18884/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18885///
18886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323)
18887#[inline]
18888#[target_feature(enable = "avx512f")]
18889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18890#[cfg_attr(test, assert_instr(vpexpandq))]
18891pub fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
18892 unsafe { transmute(src:vpexpandq(a.as_i64x8(), src:i64x8::ZERO, mask:k)) }
18893}
18894
18895/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18896///
18897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320)
18898#[inline]
18899#[target_feature(enable = "avx512f,avx512vl")]
18900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18901#[cfg_attr(test, assert_instr(vpexpandq))]
18902pub fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18903 unsafe { transmute(src:vpexpandq256(a.as_i64x4(), src.as_i64x4(), mask:k)) }
18904}
18905
18906/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18907///
18908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321)
18909#[inline]
18910#[target_feature(enable = "avx512f,avx512vl")]
18911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18912#[cfg_attr(test, assert_instr(vpexpandq))]
18913pub fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
18914 unsafe { transmute(src:vpexpandq256(a.as_i64x4(), src:i64x4::ZERO, mask:k)) }
18915}
18916
18917/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18918///
18919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318)
18920#[inline]
18921#[target_feature(enable = "avx512f,avx512vl")]
18922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18923#[cfg_attr(test, assert_instr(vpexpandq))]
18924pub fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18925 unsafe { transmute(src:vpexpandq128(a.as_i64x2(), src.as_i64x2(), mask:k)) }
18926}
18927
18928/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18929///
18930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319)
18931#[inline]
18932#[target_feature(enable = "avx512f,avx512vl")]
18933#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18934#[cfg_attr(test, assert_instr(vpexpandq))]
18935pub fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
18936 unsafe { transmute(src:vpexpandq128(a.as_i64x2(), src:i64x2::ZERO, mask:k)) }
18937}
18938
18939/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18940///
18941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340)
18942#[inline]
18943#[target_feature(enable = "avx512f")]
18944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18945#[cfg_attr(test, assert_instr(vexpandps))]
18946pub fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18947 unsafe { transmute(src:vexpandps(a.as_f32x16(), src.as_f32x16(), mask:k)) }
18948}
18949
18950/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18951///
18952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341)
18953#[inline]
18954#[target_feature(enable = "avx512f")]
18955#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18956#[cfg_attr(test, assert_instr(vexpandps))]
18957pub fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
18958 unsafe { transmute(src:vexpandps(a.as_f32x16(), src:f32x16::ZERO, mask:k)) }
18959}
18960
18961/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18962///
18963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338)
18964#[inline]
18965#[target_feature(enable = "avx512f,avx512vl")]
18966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18967#[cfg_attr(test, assert_instr(vexpandps))]
18968pub fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18969 unsafe { transmute(src:vexpandps256(a.as_f32x8(), src.as_f32x8(), mask:k)) }
18970}
18971
18972/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18973///
18974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339)
18975#[inline]
18976#[target_feature(enable = "avx512f,avx512vl")]
18977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18978#[cfg_attr(test, assert_instr(vexpandps))]
18979pub fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
18980 unsafe { transmute(src:vexpandps256(a.as_f32x8(), src:f32x8::ZERO, mask:k)) }
18981}
18982
18983/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18984///
18985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336)
18986#[inline]
18987#[target_feature(enable = "avx512f,avx512vl")]
18988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
18989#[cfg_attr(test, assert_instr(vexpandps))]
18990pub fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18991 unsafe { transmute(src:vexpandps128(a.as_f32x4(), src.as_f32x4(), mask:k)) }
18992}
18993
18994/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18995///
18996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337)
18997#[inline]
18998#[target_feature(enable = "avx512f,avx512vl")]
18999#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19000#[cfg_attr(test, assert_instr(vexpandps))]
19001pub fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
19002 unsafe { transmute(src:vexpandps128(a.as_f32x4(), src:f32x4::ZERO, mask:k)) }
19003}
19004
19005/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19006///
19007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334)
19008#[inline]
19009#[target_feature(enable = "avx512f")]
19010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19011#[cfg_attr(test, assert_instr(vexpandpd))]
19012pub fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
19013 unsafe { transmute(src:vexpandpd(a.as_f64x8(), src.as_f64x8(), mask:k)) }
19014}
19015
19016/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19017///
19018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335)
19019#[inline]
19020#[target_feature(enable = "avx512f")]
19021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19022#[cfg_attr(test, assert_instr(vexpandpd))]
19023pub fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
19024 unsafe { transmute(src:vexpandpd(a.as_f64x8(), src:f64x8::ZERO, mask:k)) }
19025}
19026
19027/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19028///
19029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332)
19030#[inline]
19031#[target_feature(enable = "avx512f,avx512vl")]
19032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19033#[cfg_attr(test, assert_instr(vexpandpd))]
19034pub fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
19035 unsafe { transmute(src:vexpandpd256(a.as_f64x4(), src.as_f64x4(), mask:k)) }
19036}
19037
19038/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19039///
19040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333)
19041#[inline]
19042#[target_feature(enable = "avx512f,avx512vl")]
19043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19044#[cfg_attr(test, assert_instr(vexpandpd))]
19045pub fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
19046 unsafe { transmute(src:vexpandpd256(a.as_f64x4(), src:f64x4::ZERO, mask:k)) }
19047}
19048
19049/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19050///
19051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330)
19052#[inline]
19053#[target_feature(enable = "avx512f,avx512vl")]
19054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19055#[cfg_attr(test, assert_instr(vexpandpd))]
19056pub fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
19057 unsafe { transmute(src:vexpandpd128(a.as_f64x2(), src.as_f64x2(), mask:k)) }
19058}
19059
19060/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19061///
19062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331)
19063#[inline]
19064#[target_feature(enable = "avx512f,avx512vl")]
19065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19066#[cfg_attr(test, assert_instr(vexpandpd))]
19067pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
19068 unsafe { transmute(src:vexpandpd128(a.as_f64x2(), src:f64x2::ZERO, mask:k)) }
19069}
19070
19071/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19072///
19073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685)
19074#[inline]
19075#[target_feature(enable = "avx512f")]
19076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19077#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19078#[rustc_legacy_const_generics(1)]
19079pub fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19080 unsafe {
19081 static_assert_uimm_bits!(IMM8, 8);
19082 let a: i32x16 = a.as_i32x16();
19083 let r: i32x16 = vprold(a, IMM8);
19084 transmute(src:r)
19085 }
19086}
19087
19088/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19089///
19090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683)
19091#[inline]
19092#[target_feature(enable = "avx512f")]
19093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19094#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19095#[rustc_legacy_const_generics(3)]
19096pub fn _mm512_mask_rol_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19097 unsafe {
19098 static_assert_uimm_bits!(IMM8, 8);
19099 let a: i32x16 = a.as_i32x16();
19100 let r: i32x16 = vprold(a, IMM8);
19101 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
19102 }
19103}
19104
19105/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19106///
19107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684)
19108#[inline]
19109#[target_feature(enable = "avx512f")]
19110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19111#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19112#[rustc_legacy_const_generics(2)]
19113pub fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19114 unsafe {
19115 static_assert_uimm_bits!(IMM8, 8);
19116 let a: i32x16 = a.as_i32x16();
19117 let r: i32x16 = vprold(a, IMM8);
19118 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
19119 }
19120}
19121
19122/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19123///
19124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682)
19125#[inline]
19126#[target_feature(enable = "avx512f,avx512vl")]
19127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19128#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19129#[rustc_legacy_const_generics(1)]
19130pub fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19131 unsafe {
19132 static_assert_uimm_bits!(IMM8, 8);
19133 let a: i32x8 = a.as_i32x8();
19134 let r: i32x8 = vprold256(a, IMM8);
19135 transmute(src:r)
19136 }
19137}
19138
19139/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19140///
19141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680)
19142#[inline]
19143#[target_feature(enable = "avx512f,avx512vl")]
19144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19145#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19146#[rustc_legacy_const_generics(3)]
19147pub fn _mm256_mask_rol_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19148 unsafe {
19149 static_assert_uimm_bits!(IMM8, 8);
19150 let a: i32x8 = a.as_i32x8();
19151 let r: i32x8 = vprold256(a, IMM8);
19152 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
19153 }
19154}
19155
19156/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19157///
19158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681)
19159#[inline]
19160#[target_feature(enable = "avx512f,avx512vl")]
19161#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19162#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19163#[rustc_legacy_const_generics(2)]
19164pub fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19165 unsafe {
19166 static_assert_uimm_bits!(IMM8, 8);
19167 let a: i32x8 = a.as_i32x8();
19168 let r: i32x8 = vprold256(a, IMM8);
19169 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
19170 }
19171}
19172
19173/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19174///
19175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679)
19176#[inline]
19177#[target_feature(enable = "avx512f,avx512vl")]
19178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19179#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19180#[rustc_legacy_const_generics(1)]
19181pub fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19182 unsafe {
19183 static_assert_uimm_bits!(IMM8, 8);
19184 let a: i32x4 = a.as_i32x4();
19185 let r: i32x4 = vprold128(a, IMM8);
19186 transmute(src:r)
19187 }
19188}
19189
19190/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19191///
19192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677)
19193#[inline]
19194#[target_feature(enable = "avx512f,avx512vl")]
19195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19196#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19197#[rustc_legacy_const_generics(3)]
19198pub fn _mm_mask_rol_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19199 unsafe {
19200 static_assert_uimm_bits!(IMM8, 8);
19201 let a: i32x4 = a.as_i32x4();
19202 let r: i32x4 = vprold128(a, IMM8);
19203 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
19204 }
19205}
19206
19207/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19208///
19209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678)
19210#[inline]
19211#[target_feature(enable = "avx512f,avx512vl")]
19212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19213#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19214#[rustc_legacy_const_generics(2)]
19215pub fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19216 unsafe {
19217 static_assert_uimm_bits!(IMM8, 8);
19218 let a: i32x4 = a.as_i32x4();
19219 let r: i32x4 = vprold128(a, IMM8);
19220 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
19221 }
19222}
19223
19224/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19225///
19226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721)
19227#[inline]
19228#[target_feature(enable = "avx512f")]
19229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19230#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19231#[rustc_legacy_const_generics(1)]
19232pub fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19233 unsafe {
19234 static_assert_uimm_bits!(IMM8, 8);
19235 let a: i32x16 = a.as_i32x16();
19236 let r: i32x16 = vprord(a, IMM8);
19237 transmute(src:r)
19238 }
19239}
19240
19241/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19242///
19243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719)
19244#[inline]
19245#[target_feature(enable = "avx512f")]
19246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19247#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19248#[rustc_legacy_const_generics(3)]
19249pub fn _mm512_mask_ror_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19250 unsafe {
19251 static_assert_uimm_bits!(IMM8, 8);
19252 let a: i32x16 = a.as_i32x16();
19253 let r: i32x16 = vprord(a, IMM8);
19254 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
19255 }
19256}
19257
19258/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19259///
19260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720)
19261#[inline]
19262#[target_feature(enable = "avx512f")]
19263#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19264#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19265#[rustc_legacy_const_generics(2)]
19266pub fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19267 unsafe {
19268 static_assert_uimm_bits!(IMM8, 8);
19269 let a: i32x16 = a.as_i32x16();
19270 let r: i32x16 = vprord(a, IMM8);
19271 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
19272 }
19273}
19274
19275/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19276///
19277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718)
19278#[inline]
19279#[target_feature(enable = "avx512f,avx512vl")]
19280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19281#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19282#[rustc_legacy_const_generics(1)]
19283pub fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19284 unsafe {
19285 static_assert_uimm_bits!(IMM8, 8);
19286 let a: i32x8 = a.as_i32x8();
19287 let r: i32x8 = vprord256(a, IMM8);
19288 transmute(src:r)
19289 }
19290}
19291
19292/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19293///
19294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716)
19295#[inline]
19296#[target_feature(enable = "avx512f,avx512vl")]
19297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19298#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19299#[rustc_legacy_const_generics(3)]
19300pub fn _mm256_mask_ror_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19301 unsafe {
19302 static_assert_uimm_bits!(IMM8, 8);
19303 let a: i32x8 = a.as_i32x8();
19304 let r: i32x8 = vprord256(a, IMM8);
19305 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
19306 }
19307}
19308
19309/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19310///
19311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717)
19312#[inline]
19313#[target_feature(enable = "avx512f,avx512vl")]
19314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19315#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19316#[rustc_legacy_const_generics(2)]
19317pub fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19318 unsafe {
19319 static_assert_uimm_bits!(IMM8, 8);
19320 let a: i32x8 = a.as_i32x8();
19321 let r: i32x8 = vprord256(a, IMM8);
19322 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
19323 }
19324}
19325
19326/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19327///
19328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715)
19329#[inline]
19330#[target_feature(enable = "avx512f,avx512vl")]
19331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19332#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19333#[rustc_legacy_const_generics(1)]
19334pub fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19335 unsafe {
19336 static_assert_uimm_bits!(IMM8, 8);
19337 let a: i32x4 = a.as_i32x4();
19338 let r: i32x4 = vprord128(a, IMM8);
19339 transmute(src:r)
19340 }
19341}
19342
19343/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19344///
19345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713)
19346#[inline]
19347#[target_feature(enable = "avx512f,avx512vl")]
19348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19349#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19350#[rustc_legacy_const_generics(3)]
19351pub fn _mm_mask_ror_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19352 unsafe {
19353 static_assert_uimm_bits!(IMM8, 8);
19354 let a: i32x4 = a.as_i32x4();
19355 let r: i32x4 = vprord128(a, IMM8);
19356 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
19357 }
19358}
19359
19360/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19361///
19362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714)
19363#[inline]
19364#[target_feature(enable = "avx512f,avx512vl")]
19365#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19366#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19367#[rustc_legacy_const_generics(2)]
19368pub fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19369 unsafe {
19370 static_assert_uimm_bits!(IMM8, 8);
19371 let a: i32x4 = a.as_i32x4();
19372 let r: i32x4 = vprord128(a, IMM8);
19373 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
19374 }
19375}
19376
19377/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19378///
19379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694)
19380#[inline]
19381#[target_feature(enable = "avx512f")]
19382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19383#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19384#[rustc_legacy_const_generics(1)]
19385pub fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19386 unsafe {
19387 static_assert_uimm_bits!(IMM8, 8);
19388 let a: i64x8 = a.as_i64x8();
19389 let r: i64x8 = vprolq(a, IMM8);
19390 transmute(src:r)
19391 }
19392}
19393
19394/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19395///
19396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692)
19397#[inline]
19398#[target_feature(enable = "avx512f")]
19399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19400#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19401#[rustc_legacy_const_generics(3)]
19402pub fn _mm512_mask_rol_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19403 unsafe {
19404 static_assert_uimm_bits!(IMM8, 8);
19405 let a: i64x8 = a.as_i64x8();
19406 let r: i64x8 = vprolq(a, IMM8);
19407 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x8()))
19408 }
19409}
19410
19411/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19412///
19413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693)
19414#[inline]
19415#[target_feature(enable = "avx512f")]
19416#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19417#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19418#[rustc_legacy_const_generics(2)]
19419pub fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19420 unsafe {
19421 static_assert_uimm_bits!(IMM8, 8);
19422 let a: i64x8 = a.as_i64x8();
19423 let r: i64x8 = vprolq(a, IMM8);
19424 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x8::ZERO))
19425 }
19426}
19427
19428/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19429///
19430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691)
19431#[inline]
19432#[target_feature(enable = "avx512f,avx512vl")]
19433#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19434#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19435#[rustc_legacy_const_generics(1)]
19436pub fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19437 unsafe {
19438 static_assert_uimm_bits!(IMM8, 8);
19439 let a: i64x4 = a.as_i64x4();
19440 let r: i64x4 = vprolq256(a, IMM8);
19441 transmute(src:r)
19442 }
19443}
19444
19445/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19446///
19447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689)
19448#[inline]
19449#[target_feature(enable = "avx512f,avx512vl")]
19450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19451#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19452#[rustc_legacy_const_generics(3)]
19453pub fn _mm256_mask_rol_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19454 unsafe {
19455 static_assert_uimm_bits!(IMM8, 8);
19456 let a: i64x4 = a.as_i64x4();
19457 let r: i64x4 = vprolq256(a, IMM8);
19458 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x4()))
19459 }
19460}
19461
19462/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19463///
19464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690)
19465#[inline]
19466#[target_feature(enable = "avx512f,avx512vl")]
19467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19468#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19469#[rustc_legacy_const_generics(2)]
19470pub fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19471 unsafe {
19472 static_assert_uimm_bits!(IMM8, 8);
19473 let a: i64x4 = a.as_i64x4();
19474 let r: i64x4 = vprolq256(a, IMM8);
19475 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x4::ZERO))
19476 }
19477}
19478
19479/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19480///
19481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688)
19482#[inline]
19483#[target_feature(enable = "avx512f,avx512vl")]
19484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19485#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19486#[rustc_legacy_const_generics(1)]
19487pub fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19488 unsafe {
19489 static_assert_uimm_bits!(IMM8, 8);
19490 let a: i64x2 = a.as_i64x2();
19491 let r: i64x2 = vprolq128(a, IMM8);
19492 transmute(src:r)
19493 }
19494}
19495
19496/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19497///
19498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686)
19499#[inline]
19500#[target_feature(enable = "avx512f,avx512vl")]
19501#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19502#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19503#[rustc_legacy_const_generics(3)]
19504pub fn _mm_mask_rol_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19505 unsafe {
19506 static_assert_uimm_bits!(IMM8, 8);
19507 let a: i64x2 = a.as_i64x2();
19508 let r: i64x2 = vprolq128(a, IMM8);
19509 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x2()))
19510 }
19511}
19512
19513/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19514///
19515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687)
19516#[inline]
19517#[target_feature(enable = "avx512f,avx512vl")]
19518#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19519#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19520#[rustc_legacy_const_generics(2)]
19521pub fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19522 unsafe {
19523 static_assert_uimm_bits!(IMM8, 8);
19524 let a: i64x2 = a.as_i64x2();
19525 let r: i64x2 = vprolq128(a, IMM8);
19526 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x2::ZERO))
19527 }
19528}
19529
19530/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19531///
19532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730)
19533#[inline]
19534#[target_feature(enable = "avx512f")]
19535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19536#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19537#[rustc_legacy_const_generics(1)]
19538pub fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19539 unsafe {
19540 static_assert_uimm_bits!(IMM8, 8);
19541 let a: i64x8 = a.as_i64x8();
19542 let r: i64x8 = vprorq(a, IMM8);
19543 transmute(src:r)
19544 }
19545}
19546
19547/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19548///
19549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728)
19550#[inline]
19551#[target_feature(enable = "avx512f")]
19552#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19553#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19554#[rustc_legacy_const_generics(3)]
19555pub fn _mm512_mask_ror_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19556 unsafe {
19557 static_assert_uimm_bits!(IMM8, 8);
19558 let a: i64x8 = a.as_i64x8();
19559 let r: i64x8 = vprorq(a, IMM8);
19560 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x8()))
19561 }
19562}
19563
19564/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19565///
19566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729)
19567#[inline]
19568#[target_feature(enable = "avx512f")]
19569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19570#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19571#[rustc_legacy_const_generics(2)]
19572pub fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19573 unsafe {
19574 static_assert_uimm_bits!(IMM8, 8);
19575 let a: i64x8 = a.as_i64x8();
19576 let r: i64x8 = vprorq(a, IMM8);
19577 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x8::ZERO))
19578 }
19579}
19580
19581/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19582///
19583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727)
19584#[inline]
19585#[target_feature(enable = "avx512f,avx512vl")]
19586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19587#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19588#[rustc_legacy_const_generics(1)]
19589pub fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19590 unsafe {
19591 static_assert_uimm_bits!(IMM8, 8);
19592 let a: i64x4 = a.as_i64x4();
19593 let r: i64x4 = vprorq256(a, IMM8);
19594 transmute(src:r)
19595 }
19596}
19597
19598/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19599///
19600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725)
19601#[inline]
19602#[target_feature(enable = "avx512f,avx512vl")]
19603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19604#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19605#[rustc_legacy_const_generics(3)]
19606pub fn _mm256_mask_ror_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19607 unsafe {
19608 static_assert_uimm_bits!(IMM8, 8);
19609 let a: i64x4 = a.as_i64x4();
19610 let r: i64x4 = vprorq256(a, IMM8);
19611 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x4()))
19612 }
19613}
19614
19615/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19616///
19617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726)
19618#[inline]
19619#[target_feature(enable = "avx512f,avx512vl")]
19620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19621#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19622#[rustc_legacy_const_generics(2)]
19623pub fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19624 unsafe {
19625 static_assert_uimm_bits!(IMM8, 8);
19626 let a: i64x4 = a.as_i64x4();
19627 let r: i64x4 = vprorq256(a, IMM8);
19628 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x4::ZERO))
19629 }
19630}
19631
19632/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19633///
19634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724)
19635#[inline]
19636#[target_feature(enable = "avx512f,avx512vl")]
19637#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19638#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19639#[rustc_legacy_const_generics(1)]
19640pub fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19641 unsafe {
19642 static_assert_uimm_bits!(IMM8, 8);
19643 let a: i64x2 = a.as_i64x2();
19644 let r: i64x2 = vprorq128(a, IMM8);
19645 transmute(src:r)
19646 }
19647}
19648
19649/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19650///
19651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722)
19652#[inline]
19653#[target_feature(enable = "avx512f,avx512vl")]
19654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19655#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19656#[rustc_legacy_const_generics(3)]
19657pub fn _mm_mask_ror_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19658 unsafe {
19659 static_assert_uimm_bits!(IMM8, 8);
19660 let a: i64x2 = a.as_i64x2();
19661 let r: i64x2 = vprorq128(a, IMM8);
19662 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x2()))
19663 }
19664}
19665
19666/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19667///
19668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723)
19669#[inline]
19670#[target_feature(enable = "avx512f,avx512vl")]
19671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19672#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19673#[rustc_legacy_const_generics(2)]
19674pub fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19675 unsafe {
19676 static_assert_uimm_bits!(IMM8, 8);
19677 let a: i64x2 = a.as_i64x2();
19678 let r: i64x2 = vprorq128(a, IMM8);
19679 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x2::ZERO))
19680 }
19681}
19682
19683/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19684///
19685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310)
19686#[inline]
19687#[target_feature(enable = "avx512f")]
19688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19689#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19690#[rustc_legacy_const_generics(1)]
19691pub fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19692 unsafe {
19693 static_assert_uimm_bits!(IMM8, 8);
19694 if IMM8 >= 32 {
19695 _mm512_setzero_si512()
19696 } else {
19697 transmute(src:simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8)))
19698 }
19699 }
19700}
19701
19702/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19703///
19704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308)
19705#[inline]
19706#[target_feature(enable = "avx512f")]
19707#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19708#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19709#[rustc_legacy_const_generics(3)]
19710pub fn _mm512_mask_slli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19711 unsafe {
19712 static_assert_uimm_bits!(IMM8, 8);
19713 let shf: u32x16 = if IMM8 >= 32 {
19714 u32x16::ZERO
19715 } else {
19716 simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8))
19717 };
19718 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u32x16()))
19719 }
19720}
19721
19722/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19723///
19724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309)
19725#[inline]
19726#[target_feature(enable = "avx512f")]
19727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19728#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19729#[rustc_legacy_const_generics(2)]
19730pub fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19731 unsafe {
19732 static_assert_uimm_bits!(IMM8, 8);
19733 if IMM8 >= 32 {
19734 _mm512_setzero_si512()
19735 } else {
19736 let shf: u32x16 = simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8));
19737 transmute(src:simd_select_bitmask(m:k, yes:shf, no:u32x16::ZERO))
19738 }
19739 }
19740}
19741
19742/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19743///
19744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305)
19745#[inline]
19746#[target_feature(enable = "avx512f,avx512vl")]
19747#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19748#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19749#[rustc_legacy_const_generics(3)]
19750pub fn _mm256_mask_slli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19751 unsafe {
19752 static_assert_uimm_bits!(IMM8, 8);
19753 let r: u32x8 = if IMM8 >= 32 {
19754 u32x8::ZERO
19755 } else {
19756 simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8))
19757 };
19758 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x8()))
19759 }
19760}
19761
19762/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19763///
19764/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306)
19765#[inline]
19766#[target_feature(enable = "avx512f,avx512vl")]
19767#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19768#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19769#[rustc_legacy_const_generics(2)]
19770pub fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19771 unsafe {
19772 static_assert_uimm_bits!(IMM8, 8);
19773 if IMM8 >= 32 {
19774 _mm256_setzero_si256()
19775 } else {
19776 let r: u32x8 = simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8));
19777 transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x8::ZERO))
19778 }
19779 }
19780}
19781
19782/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19783///
19784/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302)
19785#[inline]
19786#[target_feature(enable = "avx512f,avx512vl")]
19787#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19788#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19789#[rustc_legacy_const_generics(3)]
19790pub fn _mm_mask_slli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19791 unsafe {
19792 static_assert_uimm_bits!(IMM8, 8);
19793 let r: u32x4 = if IMM8 >= 32 {
19794 u32x4::ZERO
19795 } else {
19796 simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8))
19797 };
19798 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x4()))
19799 }
19800}
19801
19802/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19803///
19804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303)
19805#[inline]
19806#[target_feature(enable = "avx512f,avx512vl")]
19807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19808#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19809#[rustc_legacy_const_generics(2)]
19810pub fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19811 unsafe {
19812 static_assert_uimm_bits!(IMM8, 8);
19813 if IMM8 >= 32 {
19814 _mm_setzero_si128()
19815 } else {
19816 let r: u32x4 = simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8));
19817 transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x4::ZERO))
19818 }
19819 }
19820}
19821
19822/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
19823///
19824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522)
19825#[inline]
19826#[target_feature(enable = "avx512f")]
19827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19828#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19829#[rustc_legacy_const_generics(1)]
19830pub fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19831 unsafe {
19832 static_assert_uimm_bits!(IMM8, 8);
19833 if IMM8 >= 32 {
19834 _mm512_setzero_si512()
19835 } else {
19836 transmute(src:simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8)))
19837 }
19838 }
19839}
19840
19841/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19842///
19843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520)
19844#[inline]
19845#[target_feature(enable = "avx512f")]
19846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19847#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19848#[rustc_legacy_const_generics(3)]
19849pub fn _mm512_mask_srli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19850 unsafe {
19851 static_assert_uimm_bits!(IMM8, 8);
19852 let shf: u32x16 = if IMM8 >= 32 {
19853 u32x16::ZERO
19854 } else {
19855 simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8))
19856 };
19857 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u32x16()))
19858 }
19859}
19860
19861/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19862///
19863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521)
19864#[inline]
19865#[target_feature(enable = "avx512f")]
19866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19867#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19868#[rustc_legacy_const_generics(2)]
19869pub fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19870 unsafe {
19871 static_assert_uimm_bits!(IMM8, 8);
19872 if IMM8 >= 32 {
19873 _mm512_setzero_si512()
19874 } else {
19875 let shf: u32x16 = simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8));
19876 transmute(src:simd_select_bitmask(m:k, yes:shf, no:u32x16::ZERO))
19877 }
19878 }
19879}
19880
19881/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19882///
19883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517)
19884#[inline]
19885#[target_feature(enable = "avx512f,avx512vl")]
19886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19887#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19888#[rustc_legacy_const_generics(3)]
19889pub fn _mm256_mask_srli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19890 unsafe {
19891 static_assert_uimm_bits!(IMM8, 8);
19892 let r: u32x8 = if IMM8 >= 32 {
19893 u32x8::ZERO
19894 } else {
19895 simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8))
19896 };
19897 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x8()))
19898 }
19899}
19900
19901/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19902///
19903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518)
19904#[inline]
19905#[target_feature(enable = "avx512f,avx512vl")]
19906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19907#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19908#[rustc_legacy_const_generics(2)]
19909pub fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19910 unsafe {
19911 static_assert_uimm_bits!(IMM8, 8);
19912 if IMM8 >= 32 {
19913 _mm256_setzero_si256()
19914 } else {
19915 let r: u32x8 = simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8));
19916 transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x8::ZERO))
19917 }
19918 }
19919}
19920
19921/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19922///
19923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514)
19924#[inline]
19925#[target_feature(enable = "avx512f,avx512vl")]
19926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19927#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19928#[rustc_legacy_const_generics(3)]
19929pub fn _mm_mask_srli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19930 unsafe {
19931 static_assert_uimm_bits!(IMM8, 8);
19932 let r: u32x4 = if IMM8 >= 32 {
19933 u32x4::ZERO
19934 } else {
19935 simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8))
19936 };
19937 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x4()))
19938 }
19939}
19940
19941/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19942///
19943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515)
19944#[inline]
19945#[target_feature(enable = "avx512f,avx512vl")]
19946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19947#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19948#[rustc_legacy_const_generics(2)]
19949pub fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19950 unsafe {
19951 static_assert_uimm_bits!(IMM8, 8);
19952 if IMM8 >= 32 {
19953 _mm_setzero_si128()
19954 } else {
19955 let r: u32x4 = simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8));
19956 transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x4::ZERO))
19957 }
19958 }
19959}
19960
19961/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19962///
19963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319)
19964#[inline]
19965#[target_feature(enable = "avx512f")]
19966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19967#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19968#[rustc_legacy_const_generics(1)]
19969pub fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
19970 unsafe {
19971 static_assert_uimm_bits!(IMM8, 8);
19972 if IMM8 >= 64 {
19973 _mm512_setzero_si512()
19974 } else {
19975 transmute(src:simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64)))
19976 }
19977 }
19978}
19979
19980/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19981///
19982/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317)
19983#[inline]
19984#[target_feature(enable = "avx512f")]
19985#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
19986#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19987#[rustc_legacy_const_generics(3)]
19988pub fn _mm512_mask_slli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19989 unsafe {
19990 static_assert_uimm_bits!(IMM8, 8);
19991 let shf: u64x8 = if IMM8 >= 64 {
19992 u64x8::ZERO
19993 } else {
19994 simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64))
19995 };
19996 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u64x8()))
19997 }
19998}
19999
20000/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20001///
20002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318)
20003#[inline]
20004#[target_feature(enable = "avx512f")]
20005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20006#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20007#[rustc_legacy_const_generics(2)]
20008pub fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20009 unsafe {
20010 static_assert_uimm_bits!(IMM8, 8);
20011 if IMM8 >= 64 {
20012 _mm512_setzero_si512()
20013 } else {
20014 let shf: u64x8 = simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64));
20015 transmute(src:simd_select_bitmask(m:k, yes:shf, no:u64x8::ZERO))
20016 }
20017 }
20018}
20019
20020/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20021///
20022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314)
20023#[inline]
20024#[target_feature(enable = "avx512f,avx512vl")]
20025#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20026#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20027#[rustc_legacy_const_generics(3)]
20028pub fn _mm256_mask_slli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20029 unsafe {
20030 static_assert_uimm_bits!(IMM8, 8);
20031 let r: u64x4 = if IMM8 >= 64 {
20032 u64x4::ZERO
20033 } else {
20034 simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64))
20035 };
20036 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x4()))
20037 }
20038}
20039
20040/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20041///
20042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315)
20043#[inline]
20044#[target_feature(enable = "avx512f,avx512vl")]
20045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20046#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20047#[rustc_legacy_const_generics(2)]
20048pub fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20049 unsafe {
20050 static_assert_uimm_bits!(IMM8, 8);
20051 if IMM8 >= 64 {
20052 _mm256_setzero_si256()
20053 } else {
20054 let r: u64x4 = simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64));
20055 transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x4::ZERO))
20056 }
20057 }
20058}
20059
20060/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20061///
20062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311)
20063#[inline]
20064#[target_feature(enable = "avx512f,avx512vl")]
20065#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20066#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20067#[rustc_legacy_const_generics(3)]
20068pub fn _mm_mask_slli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20069 unsafe {
20070 static_assert_uimm_bits!(IMM8, 8);
20071 let r: u64x2 = if IMM8 >= 64 {
20072 u64x2::ZERO
20073 } else {
20074 simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))
20075 };
20076 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x2()))
20077 }
20078}
20079
20080/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20081///
20082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312)
20083#[inline]
20084#[target_feature(enable = "avx512f,avx512vl")]
20085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20086#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20087#[rustc_legacy_const_generics(2)]
20088pub fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20089 unsafe {
20090 static_assert_uimm_bits!(IMM8, 8);
20091 if IMM8 >= 64 {
20092 _mm_setzero_si128()
20093 } else {
20094 let r: u64x2 = simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64));
20095 transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x2::ZERO))
20096 }
20097 }
20098}
20099
20100/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20101///
20102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531)
20103#[inline]
20104#[target_feature(enable = "avx512f")]
20105#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20106#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20107#[rustc_legacy_const_generics(1)]
20108pub fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20109 unsafe {
20110 static_assert_uimm_bits!(IMM8, 8);
20111 if IMM8 >= 64 {
20112 _mm512_setzero_si512()
20113 } else {
20114 transmute(src:simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64)))
20115 }
20116 }
20117}
20118
20119/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20120///
20121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529)
20122#[inline]
20123#[target_feature(enable = "avx512f")]
20124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20125#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20126#[rustc_legacy_const_generics(3)]
20127pub fn _mm512_mask_srli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
20128 unsafe {
20129 static_assert_uimm_bits!(IMM8, 8);
20130 let shf: u64x8 = if IMM8 >= 64 {
20131 u64x8::ZERO
20132 } else {
20133 simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64))
20134 };
20135 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u64x8()))
20136 }
20137}
20138
20139/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20140///
20141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530)
20142#[inline]
20143#[target_feature(enable = "avx512f")]
20144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20145#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20146#[rustc_legacy_const_generics(2)]
20147pub fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20148 unsafe {
20149 static_assert_uimm_bits!(IMM8, 8);
20150 if IMM8 >= 64 {
20151 _mm512_setzero_si512()
20152 } else {
20153 let shf: u64x8 = simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64));
20154 transmute(src:simd_select_bitmask(m:k, yes:shf, no:u64x8::ZERO))
20155 }
20156 }
20157}
20158
20159/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20160///
20161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526)
20162#[inline]
20163#[target_feature(enable = "avx512f,avx512vl")]
20164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20165#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20166#[rustc_legacy_const_generics(3)]
20167pub fn _mm256_mask_srli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20168 unsafe {
20169 static_assert_uimm_bits!(IMM8, 8);
20170 let r: u64x4 = if IMM8 >= 64 {
20171 u64x4::ZERO
20172 } else {
20173 simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64))
20174 };
20175 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x4()))
20176 }
20177}
20178
20179/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20180///
20181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527)
20182#[inline]
20183#[target_feature(enable = "avx512f,avx512vl")]
20184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20185#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20186#[rustc_legacy_const_generics(2)]
20187pub fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20188 unsafe {
20189 static_assert_uimm_bits!(IMM8, 8);
20190 if IMM8 >= 64 {
20191 _mm256_setzero_si256()
20192 } else {
20193 let r: u64x4 = simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64));
20194 transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x4::ZERO))
20195 }
20196 }
20197}
20198
20199/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20200///
20201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523)
20202#[inline]
20203#[target_feature(enable = "avx512f,avx512vl")]
20204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20205#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20206#[rustc_legacy_const_generics(3)]
20207pub fn _mm_mask_srli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20208 unsafe {
20209 static_assert_uimm_bits!(IMM8, 8);
20210 let r: u64x2 = if IMM8 >= 64 {
20211 u64x2::ZERO
20212 } else {
20213 simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))
20214 };
20215 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x2()))
20216 }
20217}
20218
20219/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20220///
20221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524)
20222#[inline]
20223#[target_feature(enable = "avx512f,avx512vl")]
20224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20225#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20226#[rustc_legacy_const_generics(2)]
20227pub fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20228 unsafe {
20229 static_assert_uimm_bits!(IMM8, 8);
20230 if IMM8 >= 64 {
20231 _mm_setzero_si128()
20232 } else {
20233 let r: u64x2 = simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64));
20234 transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x2::ZERO))
20235 }
20236 }
20237}
20238
20239/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
20240///
20241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280)
20242#[inline]
20243#[target_feature(enable = "avx512f")]
20244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20245#[cfg_attr(test, assert_instr(vpslld))]
20246pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
20247 unsafe { transmute(src:vpslld(a.as_i32x16(), count.as_i32x4())) }
20248}
20249
20250/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20251///
20252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278)
20253#[inline]
20254#[target_feature(enable = "avx512f")]
20255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20256#[cfg_attr(test, assert_instr(vpslld))]
20257pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20258 unsafe {
20259 let shf: i32x16 = _mm512_sll_epi32(a, count).as_i32x16();
20260 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
20261 }
20262}
20263
20264/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20265///
20266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279)
20267#[inline]
20268#[target_feature(enable = "avx512f")]
20269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20270#[cfg_attr(test, assert_instr(vpslld))]
20271pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20272 unsafe {
20273 let shf: i32x16 = _mm512_sll_epi32(a, count).as_i32x16();
20274 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
20275 }
20276}
20277
20278/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20279///
20280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275)
20281#[inline]
20282#[target_feature(enable = "avx512f,avx512vl")]
20283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20284#[cfg_attr(test, assert_instr(vpslld))]
20285pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20286 unsafe {
20287 let shf: i32x8 = _mm256_sll_epi32(a, count).as_i32x8();
20288 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
20289 }
20290}
20291
20292/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20293///
20294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276)
20295#[inline]
20296#[target_feature(enable = "avx512f,avx512vl")]
20297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20298#[cfg_attr(test, assert_instr(vpslld))]
20299pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20300 unsafe {
20301 let shf: i32x8 = _mm256_sll_epi32(a, count).as_i32x8();
20302 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
20303 }
20304}
20305
20306/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20307///
20308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272)
20309#[inline]
20310#[target_feature(enable = "avx512f,avx512vl")]
20311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20312#[cfg_attr(test, assert_instr(vpslld))]
20313pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20314 unsafe {
20315 let shf: i32x4 = _mm_sll_epi32(a, count).as_i32x4();
20316 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
20317 }
20318}
20319
20320/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20321///
20322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273)
20323#[inline]
20324#[target_feature(enable = "avx512f,avx512vl")]
20325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20326#[cfg_attr(test, assert_instr(vpslld))]
20327pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20328 unsafe {
20329 let shf: i32x4 = _mm_sll_epi32(a, count).as_i32x4();
20330 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
20331 }
20332}
20333
20334/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
20335///
20336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492)
20337#[inline]
20338#[target_feature(enable = "avx512f")]
20339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20340#[cfg_attr(test, assert_instr(vpsrld))]
20341pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
20342 unsafe { transmute(src:vpsrld(a.as_i32x16(), count.as_i32x4())) }
20343}
20344
20345/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20346///
20347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490)
20348#[inline]
20349#[target_feature(enable = "avx512f")]
20350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20351#[cfg_attr(test, assert_instr(vpsrld))]
20352pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20353 unsafe {
20354 let shf: i32x16 = _mm512_srl_epi32(a, count).as_i32x16();
20355 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
20356 }
20357}
20358
20359/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20360///
20361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491)
20362#[inline]
20363#[target_feature(enable = "avx512f")]
20364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20365#[cfg_attr(test, assert_instr(vpsrld))]
20366pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20367 unsafe {
20368 let shf: i32x16 = _mm512_srl_epi32(a, count).as_i32x16();
20369 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
20370 }
20371}
20372
20373/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20374///
20375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487)
20376#[inline]
20377#[target_feature(enable = "avx512f,avx512vl")]
20378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20379#[cfg_attr(test, assert_instr(vpsrld))]
20380pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20381 unsafe {
20382 let shf: i32x8 = _mm256_srl_epi32(a, count).as_i32x8();
20383 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
20384 }
20385}
20386
20387/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20388///
20389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488)
20390#[inline]
20391#[target_feature(enable = "avx512f,avx512vl")]
20392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20393#[cfg_attr(test, assert_instr(vpsrld))]
20394pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20395 unsafe {
20396 let shf: i32x8 = _mm256_srl_epi32(a, count).as_i32x8();
20397 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
20398 }
20399}
20400
20401/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20402///
20403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484)
20404#[inline]
20405#[target_feature(enable = "avx512f,avx512vl")]
20406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20407#[cfg_attr(test, assert_instr(vpsrld))]
20408pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20409 unsafe {
20410 let shf: i32x4 = _mm_srl_epi32(a, count).as_i32x4();
20411 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
20412 }
20413}
20414
20415/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20416///
20417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485)
20418#[inline]
20419#[target_feature(enable = "avx512f,avx512vl")]
20420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20421#[cfg_attr(test, assert_instr(vpsrld))]
20422pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20423 unsafe {
20424 let shf: i32x4 = _mm_srl_epi32(a, count).as_i32x4();
20425 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
20426 }
20427}
20428
20429/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
20430///
20431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289)
20432#[inline]
20433#[target_feature(enable = "avx512f")]
20434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20435#[cfg_attr(test, assert_instr(vpsllq))]
20436pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
20437 unsafe { transmute(src:vpsllq(a.as_i64x8(), count.as_i64x2())) }
20438}
20439
20440/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20441///
20442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287)
20443#[inline]
20444#[target_feature(enable = "avx512f")]
20445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20446#[cfg_attr(test, assert_instr(vpsllq))]
20447pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20448 unsafe {
20449 let shf: i64x8 = _mm512_sll_epi64(a, count).as_i64x8();
20450 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20451 }
20452}
20453
20454/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20455///
20456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288)
20457#[inline]
20458#[target_feature(enable = "avx512f")]
20459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20460#[cfg_attr(test, assert_instr(vpsllq))]
20461pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20462 unsafe {
20463 let shf: i64x8 = _mm512_sll_epi64(a, count).as_i64x8();
20464 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
20465 }
20466}
20467
20468/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20469///
20470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284)
20471#[inline]
20472#[target_feature(enable = "avx512f,avx512vl")]
20473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20474#[cfg_attr(test, assert_instr(vpsllq))]
20475pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20476 unsafe {
20477 let shf: i64x4 = _mm256_sll_epi64(a, count).as_i64x4();
20478 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20479 }
20480}
20481
20482/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20483///
20484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285)
20485#[inline]
20486#[target_feature(enable = "avx512f,avx512vl")]
20487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20488#[cfg_attr(test, assert_instr(vpsllq))]
20489pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20490 unsafe {
20491 let shf: i64x4 = _mm256_sll_epi64(a, count).as_i64x4();
20492 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
20493 }
20494}
20495
20496/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20497///
20498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281)
20499#[inline]
20500#[target_feature(enable = "avx512f,avx512vl")]
20501#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20502#[cfg_attr(test, assert_instr(vpsllq))]
20503pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20504 unsafe {
20505 let shf: i64x2 = _mm_sll_epi64(a, count).as_i64x2();
20506 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
20507 }
20508}
20509
20510/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20511///
20512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282)
20513#[inline]
20514#[target_feature(enable = "avx512f,avx512vl")]
20515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20516#[cfg_attr(test, assert_instr(vpsllq))]
20517pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20518 unsafe {
20519 let shf: i64x2 = _mm_sll_epi64(a, count).as_i64x2();
20520 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
20521 }
20522}
20523
20524/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
20525///
20526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501)
20527#[inline]
20528#[target_feature(enable = "avx512f")]
20529#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20530#[cfg_attr(test, assert_instr(vpsrlq))]
20531pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
20532 unsafe { transmute(src:vpsrlq(a.as_i64x8(), count.as_i64x2())) }
20533}
20534
20535/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20536///
20537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499)
20538#[inline]
20539#[target_feature(enable = "avx512f")]
20540#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20541#[cfg_attr(test, assert_instr(vpsrlq))]
20542pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20543 unsafe {
20544 let shf: i64x8 = _mm512_srl_epi64(a, count).as_i64x8();
20545 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20546 }
20547}
20548
20549/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20550///
20551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500)
20552#[inline]
20553#[target_feature(enable = "avx512f")]
20554#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20555#[cfg_attr(test, assert_instr(vpsrlq))]
20556pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20557 unsafe {
20558 let shf: i64x8 = _mm512_srl_epi64(a, count).as_i64x8();
20559 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
20560 }
20561}
20562
20563/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20564///
20565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496)
20566#[inline]
20567#[target_feature(enable = "avx512f,avx512vl")]
20568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20569#[cfg_attr(test, assert_instr(vpsrlq))]
20570pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20571 unsafe {
20572 let shf: i64x4 = _mm256_srl_epi64(a, count).as_i64x4();
20573 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20574 }
20575}
20576
20577/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20578///
20579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497)
20580#[inline]
20581#[target_feature(enable = "avx512f,avx512vl")]
20582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20583#[cfg_attr(test, assert_instr(vpsrlq))]
20584pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20585 unsafe {
20586 let shf: i64x4 = _mm256_srl_epi64(a, count).as_i64x4();
20587 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
20588 }
20589}
20590
20591/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20592///
20593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493)
20594#[inline]
20595#[target_feature(enable = "avx512f,avx512vl")]
20596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20597#[cfg_attr(test, assert_instr(vpsrlq))]
20598pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20599 unsafe {
20600 let shf: i64x2 = _mm_srl_epi64(a, count).as_i64x2();
20601 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
20602 }
20603}
20604
20605/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20606///
20607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494)
20608#[inline]
20609#[target_feature(enable = "avx512f,avx512vl")]
20610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20611#[cfg_attr(test, assert_instr(vpsrlq))]
20612pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20613 unsafe {
20614 let shf: i64x2 = _mm_srl_epi64(a, count).as_i64x2();
20615 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
20616 }
20617}
20618
20619/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20620///
20621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407)
20622#[inline]
20623#[target_feature(enable = "avx512f")]
20624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20625#[cfg_attr(test, assert_instr(vpsrad))]
20626pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
20627 unsafe { transmute(src:vpsrad(a.as_i32x16(), count.as_i32x4())) }
20628}
20629
20630/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20631///
20632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405)
20633#[inline]
20634#[target_feature(enable = "avx512f")]
20635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20636#[cfg_attr(test, assert_instr(vpsrad))]
20637pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20638 unsafe {
20639 let shf: i32x16 = _mm512_sra_epi32(a, count).as_i32x16();
20640 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
20641 }
20642}
20643
20644/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20645///
20646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406)
20647#[inline]
20648#[target_feature(enable = "avx512f")]
20649#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20650#[cfg_attr(test, assert_instr(vpsrad))]
20651pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20652 unsafe {
20653 let shf: i32x16 = _mm512_sra_epi32(a, count).as_i32x16();
20654 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
20655 }
20656}
20657
20658/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20659///
20660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402)
20661#[inline]
20662#[target_feature(enable = "avx512f,avx512vl")]
20663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20664#[cfg_attr(test, assert_instr(vpsrad))]
20665pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20666 unsafe {
20667 let shf: i32x8 = _mm256_sra_epi32(a, count).as_i32x8();
20668 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
20669 }
20670}
20671
20672/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20673///
20674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403)
20675#[inline]
20676#[target_feature(enable = "avx512f,avx512vl")]
20677#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20678#[cfg_attr(test, assert_instr(vpsrad))]
20679pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20680 unsafe {
20681 let shf: i32x8 = _mm256_sra_epi32(a, count).as_i32x8();
20682 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
20683 }
20684}
20685
20686/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20687///
20688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399)
20689#[inline]
20690#[target_feature(enable = "avx512f,avx512vl")]
20691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20692#[cfg_attr(test, assert_instr(vpsrad))]
20693pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20694 unsafe {
20695 let shf: i32x4 = _mm_sra_epi32(a, count).as_i32x4();
20696 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
20697 }
20698}
20699
20700/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20701///
20702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400)
20703#[inline]
20704#[target_feature(enable = "avx512f,avx512vl")]
20705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20706#[cfg_attr(test, assert_instr(vpsrad))]
20707pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20708 unsafe {
20709 let shf: i32x4 = _mm_sra_epi32(a, count).as_i32x4();
20710 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
20711 }
20712}
20713
20714/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20715///
20716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416)
20717#[inline]
20718#[target_feature(enable = "avx512f")]
20719#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20720#[cfg_attr(test, assert_instr(vpsraq))]
20721pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
20722 unsafe { transmute(src:vpsraq(a.as_i64x8(), count.as_i64x2())) }
20723}
20724
20725/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20726///
20727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414)
20728#[inline]
20729#[target_feature(enable = "avx512f")]
20730#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20731#[cfg_attr(test, assert_instr(vpsraq))]
20732pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20733 unsafe {
20734 let shf: i64x8 = _mm512_sra_epi64(a, count).as_i64x8();
20735 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20736 }
20737}
20738
20739/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20740///
20741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415)
20742#[inline]
20743#[target_feature(enable = "avx512f")]
20744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20745#[cfg_attr(test, assert_instr(vpsraq))]
20746pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20747 unsafe {
20748 let shf: i64x8 = _mm512_sra_epi64(a, count).as_i64x8();
20749 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
20750 }
20751}
20752
20753/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20754///
20755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413)
20756#[inline]
20757#[target_feature(enable = "avx512f,avx512vl")]
20758#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20759#[cfg_attr(test, assert_instr(vpsraq))]
20760pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
20761 unsafe { transmute(src:vpsraq256(a.as_i64x4(), count.as_i64x2())) }
20762}
20763
20764/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20765///
20766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411)
20767#[inline]
20768#[target_feature(enable = "avx512f,avx512vl")]
20769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20770#[cfg_attr(test, assert_instr(vpsraq))]
20771pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20772 unsafe {
20773 let shf: i64x4 = _mm256_sra_epi64(a, count).as_i64x4();
20774 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20775 }
20776}
20777
20778/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20779///
20780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412)
20781#[inline]
20782#[target_feature(enable = "avx512f,avx512vl")]
20783#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20784#[cfg_attr(test, assert_instr(vpsraq))]
20785pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20786 unsafe {
20787 let shf: i64x4 = _mm256_sra_epi64(a, count).as_i64x4();
20788 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
20789 }
20790}
20791
20792/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20793///
20794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410)
20795#[inline]
20796#[target_feature(enable = "avx512f,avx512vl")]
20797#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20798#[cfg_attr(test, assert_instr(vpsraq))]
20799pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
20800 unsafe { transmute(src:vpsraq128(a.as_i64x2(), count.as_i64x2())) }
20801}
20802
20803/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20804///
20805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408)
20806#[inline]
20807#[target_feature(enable = "avx512f,avx512vl")]
20808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20809#[cfg_attr(test, assert_instr(vpsraq))]
20810pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20811 unsafe {
20812 let shf: i64x2 = _mm_sra_epi64(a, count).as_i64x2();
20813 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
20814 }
20815}
20816
20817/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20818///
20819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409)
20820#[inline]
20821#[target_feature(enable = "avx512f,avx512vl")]
20822#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20823#[cfg_attr(test, assert_instr(vpsraq))]
20824pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20825 unsafe {
20826 let shf: i64x2 = _mm_sra_epi64(a, count).as_i64x2();
20827 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
20828 }
20829}
20830
20831/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20832///
20833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436)
20834#[inline]
20835#[target_feature(enable = "avx512f")]
20836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20837#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20838#[rustc_legacy_const_generics(1)]
20839pub fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20840 unsafe {
20841 static_assert_uimm_bits!(IMM8, 8);
20842 transmute(src:simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(31) as i32)))
20843 }
20844}
20845
20846/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20847///
20848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434)
20849#[inline]
20850#[target_feature(enable = "avx512f")]
20851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20852#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20853#[rustc_legacy_const_generics(3)]
20854pub fn _mm512_mask_srai_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
20855 unsafe {
20856 static_assert_uimm_bits!(IMM8, 8);
20857 let r: i32x16 = simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(31) as i32));
20858 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
20859 }
20860}
20861
20862/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20863///
20864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435)
20865#[inline]
20866#[target_feature(enable = "avx512f")]
20867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20868#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20869#[rustc_legacy_const_generics(2)]
20870pub fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20871 unsafe {
20872 static_assert_uimm_bits!(IMM8, 8);
20873 let r: i32x16 = simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(31) as i32));
20874 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
20875 }
20876}
20877
20878/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20879///
20880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431)
20881#[inline]
20882#[target_feature(enable = "avx512f,avx512vl")]
20883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20884#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20885#[rustc_legacy_const_generics(3)]
20886pub fn _mm256_mask_srai_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20887 unsafe {
20888 let r: i32x8 = simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(31) as i32));
20889 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
20890 }
20891}
20892
20893/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20894///
20895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432)
20896#[inline]
20897#[target_feature(enable = "avx512f,avx512vl")]
20898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20899#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20900#[rustc_legacy_const_generics(2)]
20901pub fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20902 unsafe {
20903 let r: i32x8 = simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(31) as i32));
20904 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
20905 }
20906}
20907
20908/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20909///
20910/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428)
20911#[inline]
20912#[target_feature(enable = "avx512f,avx512vl")]
20913#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20914#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20915#[rustc_legacy_const_generics(3)]
20916pub fn _mm_mask_srai_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20917 unsafe {
20918 let r: i32x4 = simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(31) as i32));
20919 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
20920 }
20921}
20922
20923/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20924///
20925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429)
20926#[inline]
20927#[target_feature(enable = "avx512f,avx512vl")]
20928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20929#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20930#[rustc_legacy_const_generics(2)]
20931pub fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20932 unsafe {
20933 let r: i32x4 = simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(31) as i32));
20934 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
20935 }
20936}
20937
20938/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20939///
20940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445)
20941#[inline]
20942#[target_feature(enable = "avx512f")]
20943#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20944#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20945#[rustc_legacy_const_generics(1)]
20946pub fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20947 unsafe {
20948 static_assert_uimm_bits!(IMM8, 8);
20949 transmute(src:simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(63) as i64)))
20950 }
20951}
20952
20953/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20954///
20955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443)
20956#[inline]
20957#[target_feature(enable = "avx512f")]
20958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20959#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20960#[rustc_legacy_const_generics(3)]
20961pub fn _mm512_mask_srai_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
20962 unsafe {
20963 static_assert_uimm_bits!(IMM8, 8);
20964 let shf: i64x8 = simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(63) as i64));
20965 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20966 }
20967}
20968
20969/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20970///
20971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444)
20972#[inline]
20973#[target_feature(enable = "avx512f")]
20974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20975#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20976#[rustc_legacy_const_generics(2)]
20977pub fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20978 unsafe {
20979 static_assert_uimm_bits!(IMM8, 8);
20980 let shf: i64x8 = simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(63) as i64));
20981 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
20982 }
20983}
20984
20985/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20986///
20987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442)
20988#[inline]
20989#[target_feature(enable = "avx512f,avx512vl")]
20990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
20991#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20992#[rustc_legacy_const_generics(1)]
20993pub fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
20994 unsafe {
20995 static_assert_uimm_bits!(IMM8, 8);
20996 transmute(src:simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(63) as i64)))
20997 }
20998}
20999
21000/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21001///
21002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440)
21003#[inline]
21004#[target_feature(enable = "avx512f,avx512vl")]
21005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21006#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21007#[rustc_legacy_const_generics(3)]
21008pub fn _mm256_mask_srai_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
21009 unsafe {
21010 static_assert_uimm_bits!(IMM8, 8);
21011 let shf: i64x4 = simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(63) as i64));
21012 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
21013 }
21014}
21015
21016/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21017///
21018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441)
21019#[inline]
21020#[target_feature(enable = "avx512f,avx512vl")]
21021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21022#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21023#[rustc_legacy_const_generics(2)]
21024pub fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
21025 unsafe {
21026 static_assert_uimm_bits!(IMM8, 8);
21027 let shf: i64x4 = simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(63) as i64));
21028 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
21029 }
21030}
21031
21032/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
21033///
21034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439)
21035#[inline]
21036#[target_feature(enable = "avx512f,avx512vl")]
21037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21038#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21039#[rustc_legacy_const_generics(1)]
21040pub fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
21041 unsafe {
21042 static_assert_uimm_bits!(IMM8, 8);
21043 transmute(src:simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(63) as i64)))
21044 }
21045}
21046
21047/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21048///
21049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437)
21050#[inline]
21051#[target_feature(enable = "avx512f,avx512vl")]
21052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21053#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21054#[rustc_legacy_const_generics(3)]
21055pub fn _mm_mask_srai_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
21056 unsafe {
21057 static_assert_uimm_bits!(IMM8, 8);
21058 let shf: i64x2 = simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(63) as i64));
21059 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
21060 }
21061}
21062
21063/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21064///
21065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438)
21066#[inline]
21067#[target_feature(enable = "avx512f,avx512vl")]
21068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21069#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
21070#[rustc_legacy_const_generics(2)]
21071pub fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
21072 unsafe {
21073 static_assert_uimm_bits!(IMM8, 8);
21074 let shf: i64x2 = simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(63) as i64));
21075 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
21076 }
21077}
21078
21079/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21080///
21081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465)
21082#[inline]
21083#[target_feature(enable = "avx512f")]
21084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21085#[cfg_attr(test, assert_instr(vpsravd))]
21086pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
21087 unsafe { transmute(src:vpsravd(a.as_i32x16(), count.as_i32x16())) }
21088}
21089
21090/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21091///
21092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463)
21093#[inline]
21094#[target_feature(enable = "avx512f")]
21095#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21096#[cfg_attr(test, assert_instr(vpsravd))]
21097pub fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21098 unsafe {
21099 let shf: i32x16 = _mm512_srav_epi32(a, count).as_i32x16();
21100 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
21101 }
21102}
21103
21104/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21105///
21106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464)
21107#[inline]
21108#[target_feature(enable = "avx512f")]
21109#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21110#[cfg_attr(test, assert_instr(vpsravd))]
21111pub fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21112 unsafe {
21113 let shf: i32x16 = _mm512_srav_epi32(a, count).as_i32x16();
21114 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
21115 }
21116}
21117
21118/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21119///
21120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460)
21121#[inline]
21122#[target_feature(enable = "avx512f,avx512vl")]
21123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21124#[cfg_attr(test, assert_instr(vpsravd))]
21125pub fn _mm256_mask_srav_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21126 unsafe {
21127 let shf: i32x8 = _mm256_srav_epi32(a, count).as_i32x8();
21128 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
21129 }
21130}
21131
21132/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21133///
21134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461)
21135#[inline]
21136#[target_feature(enable = "avx512f,avx512vl")]
21137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21138#[cfg_attr(test, assert_instr(vpsravd))]
21139pub fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21140 unsafe {
21141 let shf: i32x8 = _mm256_srav_epi32(a, count).as_i32x8();
21142 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
21143 }
21144}
21145
21146/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21147///
21148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457)
21149#[inline]
21150#[target_feature(enable = "avx512f,avx512vl")]
21151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21152#[cfg_attr(test, assert_instr(vpsravd))]
21153pub fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21154 unsafe {
21155 let shf: i32x4 = _mm_srav_epi32(a, count).as_i32x4();
21156 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
21157 }
21158}
21159
21160/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21161///
21162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458)
21163#[inline]
21164#[target_feature(enable = "avx512f,avx512vl")]
21165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21166#[cfg_attr(test, assert_instr(vpsravd))]
21167pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21168 unsafe {
21169 let shf: i32x4 = _mm_srav_epi32(a, count).as_i32x4();
21170 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
21171 }
21172}
21173
21174/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21175///
21176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474)
21177#[inline]
21178#[target_feature(enable = "avx512f")]
21179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21180#[cfg_attr(test, assert_instr(vpsravq))]
21181pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
21182 unsafe { transmute(src:vpsravq(a.as_i64x8(), count.as_i64x8())) }
21183}
21184
21185/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21186///
21187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472)
21188#[inline]
21189#[target_feature(enable = "avx512f")]
21190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21191#[cfg_attr(test, assert_instr(vpsravq))]
21192pub fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21193 unsafe {
21194 let shf: i64x8 = _mm512_srav_epi64(a, count).as_i64x8();
21195 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
21196 }
21197}
21198
21199/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21200///
21201/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473)
21202#[inline]
21203#[target_feature(enable = "avx512f")]
21204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21205#[cfg_attr(test, assert_instr(vpsravq))]
21206pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21207 unsafe {
21208 let shf: i64x8 = _mm512_srav_epi64(a, count).as_i64x8();
21209 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
21210 }
21211}
21212
21213/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21214///
21215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471)
21216#[inline]
21217#[target_feature(enable = "avx512f,avx512vl")]
21218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21219#[cfg_attr(test, assert_instr(vpsravq))]
21220pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
21221 unsafe { transmute(src:vpsravq256(a.as_i64x4(), count.as_i64x4())) }
21222}
21223
21224/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21225///
21226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469)
21227#[inline]
21228#[target_feature(enable = "avx512f,avx512vl")]
21229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21230#[cfg_attr(test, assert_instr(vpsravq))]
21231pub fn _mm256_mask_srav_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21232 unsafe {
21233 let shf: i64x4 = _mm256_srav_epi64(a, count).as_i64x4();
21234 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
21235 }
21236}
21237
21238/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21239///
21240/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470)
21241#[inline]
21242#[target_feature(enable = "avx512f,avx512vl")]
21243#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21244#[cfg_attr(test, assert_instr(vpsravq))]
21245pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21246 unsafe {
21247 let shf: i64x4 = _mm256_srav_epi64(a, count).as_i64x4();
21248 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
21249 }
21250}
21251
21252/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21253///
21254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468)
21255#[inline]
21256#[target_feature(enable = "avx512f,avx512vl")]
21257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21258#[cfg_attr(test, assert_instr(vpsravq))]
21259pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
21260 unsafe { transmute(src:vpsravq128(a.as_i64x2(), count.as_i64x2())) }
21261}
21262
21263/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21264///
21265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466)
21266#[inline]
21267#[target_feature(enable = "avx512f,avx512vl")]
21268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21269#[cfg_attr(test, assert_instr(vpsravq))]
21270pub fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21271 unsafe {
21272 let shf: i64x2 = _mm_srav_epi64(a, count).as_i64x2();
21273 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
21274 }
21275}
21276
21277/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21278///
21279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467)
21280#[inline]
21281#[target_feature(enable = "avx512f,avx512vl")]
21282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21283#[cfg_attr(test, assert_instr(vpsravq))]
21284pub fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21285 unsafe {
21286 let shf: i64x2 = _mm_srav_epi64(a, count).as_i64x2();
21287 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
21288 }
21289}
21290
21291/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21292///
21293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703)
21294#[inline]
21295#[target_feature(enable = "avx512f")]
21296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21297#[cfg_attr(test, assert_instr(vprolvd))]
21298pub fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
21299 unsafe { transmute(src:vprolvd(a.as_i32x16(), b.as_i32x16())) }
21300}
21301
21302/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21303///
21304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701)
21305#[inline]
21306#[target_feature(enable = "avx512f")]
21307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21308#[cfg_attr(test, assert_instr(vprolvd))]
21309pub fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21310 unsafe {
21311 let rol: i32x16 = _mm512_rolv_epi32(a, b).as_i32x16();
21312 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x16()))
21313 }
21314}
21315
21316/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21317///
21318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702)
21319#[inline]
21320#[target_feature(enable = "avx512f")]
21321#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21322#[cfg_attr(test, assert_instr(vprolvd))]
21323pub fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21324 unsafe {
21325 let rol: i32x16 = _mm512_rolv_epi32(a, b).as_i32x16();
21326 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i32x16::ZERO))
21327 }
21328}
21329
21330/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21331///
21332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700)
21333#[inline]
21334#[target_feature(enable = "avx512f,avx512vl")]
21335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21336#[cfg_attr(test, assert_instr(vprolvd))]
21337pub fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
21338 unsafe { transmute(src:vprolvd256(a.as_i32x8(), b.as_i32x8())) }
21339}
21340
21341/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21342///
21343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi32&expand=4698)
21344#[inline]
21345#[target_feature(enable = "avx512f,avx512vl")]
21346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21347#[cfg_attr(test, assert_instr(vprolvd))]
21348pub fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21349 unsafe {
21350 let rol: i32x8 = _mm256_rolv_epi32(a, b).as_i32x8();
21351 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x8()))
21352 }
21353}
21354
21355/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21356///
21357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699)
21358#[inline]
21359#[target_feature(enable = "avx512f,avx512vl")]
21360#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21361#[cfg_attr(test, assert_instr(vprolvd))]
21362pub fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21363 unsafe {
21364 let rol: i32x8 = _mm256_rolv_epi32(a, b).as_i32x8();
21365 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i32x8::ZERO))
21366 }
21367}
21368
21369/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21370///
21371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697)
21372#[inline]
21373#[target_feature(enable = "avx512f,avx512vl")]
21374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21375#[cfg_attr(test, assert_instr(vprolvd))]
21376pub fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
21377 unsafe { transmute(src:vprolvd128(a.as_i32x4(), b.as_i32x4())) }
21378}
21379
21380/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21381///
21382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695)
21383#[inline]
21384#[target_feature(enable = "avx512f,avx512vl")]
21385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21386#[cfg_attr(test, assert_instr(vprolvd))]
21387pub fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21388 unsafe {
21389 let rol: i32x4 = _mm_rolv_epi32(a, b).as_i32x4();
21390 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x4()))
21391 }
21392}
21393
21394/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21395///
21396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696)
21397#[inline]
21398#[target_feature(enable = "avx512f,avx512vl")]
21399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21400#[cfg_attr(test, assert_instr(vprolvd))]
21401pub fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21402 unsafe {
21403 let rol: i32x4 = _mm_rolv_epi32(a, b).as_i32x4();
21404 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i32x4::ZERO))
21405 }
21406}
21407
21408/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21409///
21410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739)
21411#[inline]
21412#[target_feature(enable = "avx512f")]
21413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21414#[cfg_attr(test, assert_instr(vprorvd))]
21415pub fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
21416 unsafe { transmute(src:vprorvd(a.as_i32x16(), b.as_i32x16())) }
21417}
21418
21419/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21420///
21421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737)
21422#[inline]
21423#[target_feature(enable = "avx512f")]
21424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21425#[cfg_attr(test, assert_instr(vprorvd))]
21426pub fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21427 unsafe {
21428 let ror: i32x16 = _mm512_rorv_epi32(a, b).as_i32x16();
21429 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x16()))
21430 }
21431}
21432
21433/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21434///
21435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738)
21436#[inline]
21437#[target_feature(enable = "avx512f")]
21438#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21439#[cfg_attr(test, assert_instr(vprorvd))]
21440pub fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21441 unsafe {
21442 let ror: i32x16 = _mm512_rorv_epi32(a, b).as_i32x16();
21443 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i32x16::ZERO))
21444 }
21445}
21446
21447/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21448///
21449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736)
21450#[inline]
21451#[target_feature(enable = "avx512f,avx512vl")]
21452#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21453#[cfg_attr(test, assert_instr(vprorvd))]
21454pub fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
21455 unsafe { transmute(src:vprorvd256(a.as_i32x8(), b.as_i32x8())) }
21456}
21457
21458/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21459///
21460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734)
21461#[inline]
21462#[target_feature(enable = "avx512f,avx512vl")]
21463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21464#[cfg_attr(test, assert_instr(vprorvd))]
21465pub fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21466 unsafe {
21467 let ror: i32x8 = _mm256_rorv_epi32(a, b).as_i32x8();
21468 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x8()))
21469 }
21470}
21471
21472/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21473///
21474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735)
21475#[inline]
21476#[target_feature(enable = "avx512f,avx512vl")]
21477#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21478#[cfg_attr(test, assert_instr(vprorvd))]
21479pub fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21480 unsafe {
21481 let ror: i32x8 = _mm256_rorv_epi32(a, b).as_i32x8();
21482 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i32x8::ZERO))
21483 }
21484}
21485
21486/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21487///
21488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733)
21489#[inline]
21490#[target_feature(enable = "avx512f,avx512vl")]
21491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21492#[cfg_attr(test, assert_instr(vprorvd))]
21493pub fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
21494 unsafe { transmute(src:vprorvd128(a.as_i32x4(), b.as_i32x4())) }
21495}
21496
21497/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21498///
21499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731)
21500#[inline]
21501#[target_feature(enable = "avx512f,avx512vl")]
21502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21503#[cfg_attr(test, assert_instr(vprorvd))]
21504pub fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21505 unsafe {
21506 let ror: i32x4 = _mm_rorv_epi32(a, b).as_i32x4();
21507 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x4()))
21508 }
21509}
21510
21511/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21512///
21513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732)
21514#[inline]
21515#[target_feature(enable = "avx512f,avx512vl")]
21516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21517#[cfg_attr(test, assert_instr(vprorvd))]
21518pub fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21519 unsafe {
21520 let ror: i32x4 = _mm_rorv_epi32(a, b).as_i32x4();
21521 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i32x4::ZERO))
21522 }
21523}
21524
21525/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21526///
21527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712)
21528#[inline]
21529#[target_feature(enable = "avx512f")]
21530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21531#[cfg_attr(test, assert_instr(vprolvq))]
21532pub fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
21533 unsafe { transmute(src:vprolvq(a.as_i64x8(), b.as_i64x8())) }
21534}
21535
21536/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21537///
21538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710)
21539#[inline]
21540#[target_feature(enable = "avx512f")]
21541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21542#[cfg_attr(test, assert_instr(vprolvq))]
21543pub fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21544 unsafe {
21545 let rol: i64x8 = _mm512_rolv_epi64(a, b).as_i64x8();
21546 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x8()))
21547 }
21548}
21549
21550/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21551///
21552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711)
21553#[inline]
21554#[target_feature(enable = "avx512f")]
21555#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21556#[cfg_attr(test, assert_instr(vprolvq))]
21557pub fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21558 unsafe {
21559 let rol: i64x8 = _mm512_rolv_epi64(a, b).as_i64x8();
21560 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i64x8::ZERO))
21561 }
21562}
21563
21564/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21565///
21566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709)
21567#[inline]
21568#[target_feature(enable = "avx512f,avx512vl")]
21569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21570#[cfg_attr(test, assert_instr(vprolvq))]
21571pub fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
21572 unsafe { transmute(src:vprolvq256(a.as_i64x4(), b.as_i64x4())) }
21573}
21574
21575/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21576///
21577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707)
21578#[inline]
21579#[target_feature(enable = "avx512f,avx512vl")]
21580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21581#[cfg_attr(test, assert_instr(vprolvq))]
21582pub fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21583 unsafe {
21584 let rol: i64x4 = _mm256_rolv_epi64(a, b).as_i64x4();
21585 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x4()))
21586 }
21587}
21588
21589/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21590///
21591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708)
21592#[inline]
21593#[target_feature(enable = "avx512f,avx512vl")]
21594#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21595#[cfg_attr(test, assert_instr(vprolvq))]
21596pub fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21597 unsafe {
21598 let rol: i64x4 = _mm256_rolv_epi64(a, b).as_i64x4();
21599 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i64x4::ZERO))
21600 }
21601}
21602
21603/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21604///
21605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706)
21606#[inline]
21607#[target_feature(enable = "avx512f,avx512vl")]
21608#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21609#[cfg_attr(test, assert_instr(vprolvq))]
21610pub fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
21611 unsafe { transmute(src:vprolvq128(a.as_i64x2(), b.as_i64x2())) }
21612}
21613
21614/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21615///
21616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704)
21617#[inline]
21618#[target_feature(enable = "avx512f,avx512vl")]
21619#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21620#[cfg_attr(test, assert_instr(vprolvq))]
21621pub fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21622 unsafe {
21623 let rol: i64x2 = _mm_rolv_epi64(a, b).as_i64x2();
21624 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x2()))
21625 }
21626}
21627
21628/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21629///
21630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705)
21631#[inline]
21632#[target_feature(enable = "avx512f,avx512vl")]
21633#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21634#[cfg_attr(test, assert_instr(vprolvq))]
21635pub fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21636 unsafe {
21637 let rol: i64x2 = _mm_rolv_epi64(a, b).as_i64x2();
21638 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i64x2::ZERO))
21639 }
21640}
21641
21642/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21643///
21644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748)
21645#[inline]
21646#[target_feature(enable = "avx512f")]
21647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21648#[cfg_attr(test, assert_instr(vprorvq))]
21649pub fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
21650 unsafe { transmute(src:vprorvq(a.as_i64x8(), b.as_i64x8())) }
21651}
21652
21653/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21654///
21655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746)
21656#[inline]
21657#[target_feature(enable = "avx512f")]
21658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21659#[cfg_attr(test, assert_instr(vprorvq))]
21660pub fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21661 unsafe {
21662 let ror: i64x8 = _mm512_rorv_epi64(a, b).as_i64x8();
21663 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x8()))
21664 }
21665}
21666
21667/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21668///
21669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747)
21670#[inline]
21671#[target_feature(enable = "avx512f")]
21672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21673#[cfg_attr(test, assert_instr(vprorvq))]
21674pub fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21675 unsafe {
21676 let ror: i64x8 = _mm512_rorv_epi64(a, b).as_i64x8();
21677 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i64x8::ZERO))
21678 }
21679}
21680
21681/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21682///
21683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745)
21684#[inline]
21685#[target_feature(enable = "avx512f,avx512vl")]
21686#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21687#[cfg_attr(test, assert_instr(vprorvq))]
21688pub fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
21689 unsafe { transmute(src:vprorvq256(a.as_i64x4(), b.as_i64x4())) }
21690}
21691
21692/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21693///
21694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743)
21695#[inline]
21696#[target_feature(enable = "avx512f,avx512vl")]
21697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21698#[cfg_attr(test, assert_instr(vprorvq))]
21699pub fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21700 unsafe {
21701 let ror: i64x4 = _mm256_rorv_epi64(a, b).as_i64x4();
21702 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x4()))
21703 }
21704}
21705
21706/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21707///
21708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744)
21709#[inline]
21710#[target_feature(enable = "avx512f,avx512vl")]
21711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21712#[cfg_attr(test, assert_instr(vprorvq))]
21713pub fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21714 unsafe {
21715 let ror: i64x4 = _mm256_rorv_epi64(a, b).as_i64x4();
21716 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i64x4::ZERO))
21717 }
21718}
21719
21720/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21721///
21722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742)
21723#[inline]
21724#[target_feature(enable = "avx512f,avx512vl")]
21725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21726#[cfg_attr(test, assert_instr(vprorvq))]
21727pub fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
21728 unsafe { transmute(src:vprorvq128(a.as_i64x2(), b.as_i64x2())) }
21729}
21730
21731/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21732///
21733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740)
21734#[inline]
21735#[target_feature(enable = "avx512f,avx512vl")]
21736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21737#[cfg_attr(test, assert_instr(vprorvq))]
21738pub fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21739 unsafe {
21740 let ror: i64x2 = _mm_rorv_epi64(a, b).as_i64x2();
21741 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x2()))
21742 }
21743}
21744
21745/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21746///
21747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741)
21748#[inline]
21749#[target_feature(enable = "avx512f,avx512vl")]
21750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21751#[cfg_attr(test, assert_instr(vprorvq))]
21752pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21753 unsafe {
21754 let ror: i64x2 = _mm_rorv_epi64(a, b).as_i64x2();
21755 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i64x2::ZERO))
21756 }
21757}
21758
21759/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21760///
21761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342)
21762#[inline]
21763#[target_feature(enable = "avx512f")]
21764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21765#[cfg_attr(test, assert_instr(vpsllvd))]
21766pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
21767 unsafe { transmute(src:vpsllvd(a.as_i32x16(), b:count.as_i32x16())) }
21768}
21769
21770/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21771///
21772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340)
21773#[inline]
21774#[target_feature(enable = "avx512f")]
21775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21776#[cfg_attr(test, assert_instr(vpsllvd))]
21777pub fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21778 unsafe {
21779 let shf: i32x16 = _mm512_sllv_epi32(a, count).as_i32x16();
21780 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
21781 }
21782}
21783
21784/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21785///
21786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341)
21787#[inline]
21788#[target_feature(enable = "avx512f")]
21789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21790#[cfg_attr(test, assert_instr(vpsllvd))]
21791pub fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21792 unsafe {
21793 let shf: i32x16 = _mm512_sllv_epi32(a, count).as_i32x16();
21794 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
21795 }
21796}
21797
21798/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21799///
21800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337)
21801#[inline]
21802#[target_feature(enable = "avx512f,avx512vl")]
21803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21804#[cfg_attr(test, assert_instr(vpsllvd))]
21805pub fn _mm256_mask_sllv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21806 unsafe {
21807 let shf: i32x8 = _mm256_sllv_epi32(a, count).as_i32x8();
21808 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
21809 }
21810}
21811
21812/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21813///
21814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338)
21815#[inline]
21816#[target_feature(enable = "avx512f,avx512vl")]
21817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21818#[cfg_attr(test, assert_instr(vpsllvd))]
21819pub fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21820 unsafe {
21821 let shf: i32x8 = _mm256_sllv_epi32(a, count).as_i32x8();
21822 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
21823 }
21824}
21825
21826/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21827///
21828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334)
21829#[inline]
21830#[target_feature(enable = "avx512f,avx512vl")]
21831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21832#[cfg_attr(test, assert_instr(vpsllvd))]
21833pub fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21834 unsafe {
21835 let shf: i32x4 = _mm_sllv_epi32(a, count).as_i32x4();
21836 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
21837 }
21838}
21839
21840/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21841///
21842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335)
21843#[inline]
21844#[target_feature(enable = "avx512f,avx512vl")]
21845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21846#[cfg_attr(test, assert_instr(vpsllvd))]
21847pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21848 unsafe {
21849 let shf: i32x4 = _mm_sllv_epi32(a, count).as_i32x4();
21850 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
21851 }
21852}
21853
21854/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21855///
21856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554)
21857#[inline]
21858#[target_feature(enable = "avx512f")]
21859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21860#[cfg_attr(test, assert_instr(vpsrlvd))]
21861pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
21862 unsafe { transmute(src:vpsrlvd(a.as_i32x16(), b:count.as_i32x16())) }
21863}
21864
21865/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21866///
21867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552)
21868#[inline]
21869#[target_feature(enable = "avx512f")]
21870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21871#[cfg_attr(test, assert_instr(vpsrlvd))]
21872pub fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21873 unsafe {
21874 let shf: i32x16 = _mm512_srlv_epi32(a, count).as_i32x16();
21875 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
21876 }
21877}
21878
21879/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21880///
21881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553)
21882#[inline]
21883#[target_feature(enable = "avx512f")]
21884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21885#[cfg_attr(test, assert_instr(vpsrlvd))]
21886pub fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21887 unsafe {
21888 let shf: i32x16 = _mm512_srlv_epi32(a, count).as_i32x16();
21889 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
21890 }
21891}
21892
21893/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21894///
21895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549)
21896#[inline]
21897#[target_feature(enable = "avx512f,avx512vl")]
21898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21899#[cfg_attr(test, assert_instr(vpsrlvd))]
21900pub fn _mm256_mask_srlv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21901 unsafe {
21902 let shf: i32x8 = _mm256_srlv_epi32(a, count).as_i32x8();
21903 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
21904 }
21905}
21906
21907/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21908///
21909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550)
21910#[inline]
21911#[target_feature(enable = "avx512f,avx512vl")]
21912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21913#[cfg_attr(test, assert_instr(vpsrlvd))]
21914pub fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21915 unsafe {
21916 let shf: i32x8 = _mm256_srlv_epi32(a, count).as_i32x8();
21917 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
21918 }
21919}
21920
21921/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21922///
21923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546)
21924#[inline]
21925#[target_feature(enable = "avx512f,avx512vl")]
21926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21927#[cfg_attr(test, assert_instr(vpsrlvd))]
21928pub fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21929 unsafe {
21930 let shf: i32x4 = _mm_srlv_epi32(a, count).as_i32x4();
21931 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
21932 }
21933}
21934
21935/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21936///
21937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547)
21938#[inline]
21939#[target_feature(enable = "avx512f,avx512vl")]
21940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21941#[cfg_attr(test, assert_instr(vpsrlvd))]
21942pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21943 unsafe {
21944 let shf: i32x4 = _mm_srlv_epi32(a, count).as_i32x4();
21945 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
21946 }
21947}
21948
21949/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21950///
21951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351)
21952#[inline]
21953#[target_feature(enable = "avx512f")]
21954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21955#[cfg_attr(test, assert_instr(vpsllvq))]
21956pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
21957 unsafe { transmute(src:vpsllvq(a.as_i64x8(), b:count.as_i64x8())) }
21958}
21959
21960/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21961///
21962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349)
21963#[inline]
21964#[target_feature(enable = "avx512f")]
21965#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21966#[cfg_attr(test, assert_instr(vpsllvq))]
21967pub fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21968 unsafe {
21969 let shf: i64x8 = _mm512_sllv_epi64(a, count).as_i64x8();
21970 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
21971 }
21972}
21973
21974/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21975///
21976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350)
21977#[inline]
21978#[target_feature(enable = "avx512f")]
21979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21980#[cfg_attr(test, assert_instr(vpsllvq))]
21981pub fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21982 unsafe {
21983 let shf: i64x8 = _mm512_sllv_epi64(a, count).as_i64x8();
21984 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
21985 }
21986}
21987
21988/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21989///
21990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346)
21991#[inline]
21992#[target_feature(enable = "avx512f,avx512vl")]
21993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
21994#[cfg_attr(test, assert_instr(vpsllvq))]
21995pub fn _mm256_mask_sllv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21996 unsafe {
21997 let shf: i64x4 = _mm256_sllv_epi64(a, count).as_i64x4();
21998 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
21999 }
22000}
22001
22002/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22003///
22004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347)
22005#[inline]
22006#[target_feature(enable = "avx512f,avx512vl")]
22007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22008#[cfg_attr(test, assert_instr(vpsllvq))]
22009pub fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22010 unsafe {
22011 let shf: i64x4 = _mm256_sllv_epi64(a, count).as_i64x4();
22012 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
22013 }
22014}
22015
22016/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22017///
22018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343)
22019#[inline]
22020#[target_feature(enable = "avx512f,avx512vl")]
22021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22022#[cfg_attr(test, assert_instr(vpsllvq))]
22023pub fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22024 unsafe {
22025 let shf: i64x2 = _mm_sllv_epi64(a, count).as_i64x2();
22026 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
22027 }
22028}
22029
22030/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22031///
22032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344)
22033#[inline]
22034#[target_feature(enable = "avx512f,avx512vl")]
22035#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22036#[cfg_attr(test, assert_instr(vpsllvq))]
22037pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22038 unsafe {
22039 let shf: i64x2 = _mm_sllv_epi64(a, count).as_i64x2();
22040 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
22041 }
22042}
22043
22044/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
22045///
22046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563)
22047#[inline]
22048#[target_feature(enable = "avx512f")]
22049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22050#[cfg_attr(test, assert_instr(vpsrlvq))]
22051pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
22052 unsafe { transmute(src:vpsrlvq(a.as_i64x8(), b:count.as_i64x8())) }
22053}
22054
22055/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22056///
22057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561)
22058#[inline]
22059#[target_feature(enable = "avx512f")]
22060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22061#[cfg_attr(test, assert_instr(vpsrlvq))]
22062pub fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22063 unsafe {
22064 let shf: i64x8 = _mm512_srlv_epi64(a, count).as_i64x8();
22065 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
22066 }
22067}
22068
22069/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22070///
22071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562)
22072#[inline]
22073#[target_feature(enable = "avx512f")]
22074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22075#[cfg_attr(test, assert_instr(vpsrlvq))]
22076pub fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22077 unsafe {
22078 let shf: i64x8 = _mm512_srlv_epi64(a, count).as_i64x8();
22079 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
22080 }
22081}
22082
22083/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22084///
22085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558)
22086#[inline]
22087#[target_feature(enable = "avx512f,avx512vl")]
22088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22089#[cfg_attr(test, assert_instr(vpsrlvq))]
22090pub fn _mm256_mask_srlv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22091 unsafe {
22092 let shf: i64x4 = _mm256_srlv_epi64(a, count).as_i64x4();
22093 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
22094 }
22095}
22096
22097/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22098///
22099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559)
22100#[inline]
22101#[target_feature(enable = "avx512f,avx512vl")]
22102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22103#[cfg_attr(test, assert_instr(vpsrlvq))]
22104pub fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22105 unsafe {
22106 let shf: i64x4 = _mm256_srlv_epi64(a, count).as_i64x4();
22107 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
22108 }
22109}
22110
22111/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22112///
22113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555)
22114#[inline]
22115#[target_feature(enable = "avx512f,avx512vl")]
22116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22117#[cfg_attr(test, assert_instr(vpsrlvq))]
22118pub fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22119 unsafe {
22120 let shf: i64x2 = _mm_srlv_epi64(a, count).as_i64x2();
22121 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
22122 }
22123}
22124
22125/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22126///
22127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556)
22128#[inline]
22129#[target_feature(enable = "avx512f,avx512vl")]
22130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22131#[cfg_attr(test, assert_instr(vpsrlvq))]
22132pub fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22133 unsafe {
22134 let shf: i64x2 = _mm_srlv_epi64(a, count).as_i64x2();
22135 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
22136 }
22137}
22138
22139/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22140///
22141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170)
22142#[inline]
22143#[target_feature(enable = "avx512f")]
22144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22145#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22146#[rustc_legacy_const_generics(1)]
22147pub fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
22148 unsafe {
22149 static_assert_uimm_bits!(MASK, 8);
22150 simd_shuffle!(
22151 a,
22152 a,
22153 [
22154 MASK as u32 & 0b11,
22155 (MASK as u32 >> 2) & 0b11,
22156 ((MASK as u32 >> 4) & 0b11),
22157 ((MASK as u32 >> 6) & 0b11),
22158 (MASK as u32 & 0b11) + 4,
22159 ((MASK as u32 >> 2) & 0b11) + 4,
22160 ((MASK as u32 >> 4) & 0b11) + 4,
22161 ((MASK as u32 >> 6) & 0b11) + 4,
22162 (MASK as u32 & 0b11) + 8,
22163 ((MASK as u32 >> 2) & 0b11) + 8,
22164 ((MASK as u32 >> 4) & 0b11) + 8,
22165 ((MASK as u32 >> 6) & 0b11) + 8,
22166 (MASK as u32 & 0b11) + 12,
22167 ((MASK as u32 >> 2) & 0b11) + 12,
22168 ((MASK as u32 >> 4) & 0b11) + 12,
22169 ((MASK as u32 >> 6) & 0b11) + 12,
22170 ],
22171 )
22172 }
22173}
22174
22175/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22176///
22177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168)
22178#[inline]
22179#[target_feature(enable = "avx512f")]
22180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22181#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22182#[rustc_legacy_const_generics(3)]
22183pub fn _mm512_mask_permute_ps<const MASK: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
22184 unsafe {
22185 static_assert_uimm_bits!(MASK, 8);
22186 let r: __m512 = _mm512_permute_ps::<MASK>(a);
22187 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
22188 }
22189}
22190
22191/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22192///
22193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169)
22194#[inline]
22195#[target_feature(enable = "avx512f")]
22196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22197#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22198#[rustc_legacy_const_generics(2)]
22199pub fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
22200 unsafe {
22201 static_assert_uimm_bits!(MASK, 8);
22202 let r: __m512 = _mm512_permute_ps::<MASK>(a);
22203 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
22204 }
22205}
22206
22207/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22208///
22209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165)
22210#[inline]
22211#[target_feature(enable = "avx512f,avx512vl")]
22212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22213#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22214#[rustc_legacy_const_generics(3)]
22215pub fn _mm256_mask_permute_ps<const MASK: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
22216 unsafe {
22217 let r: __m256 = _mm256_permute_ps::<MASK>(a);
22218 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
22219 }
22220}
22221
22222/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22223///
22224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166)
22225#[inline]
22226#[target_feature(enable = "avx512f,avx512vl")]
22227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22228#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22229#[rustc_legacy_const_generics(2)]
22230pub fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
22231 unsafe {
22232 let r: __m256 = _mm256_permute_ps::<MASK>(a);
22233 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
22234 }
22235}
22236
22237/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22238///
22239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162)
22240#[inline]
22241#[target_feature(enable = "avx512f,avx512vl")]
22242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22243#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22244#[rustc_legacy_const_generics(3)]
22245pub fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
22246 unsafe {
22247 let r: __m128 = _mm_permute_ps::<MASK>(a);
22248 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
22249 }
22250}
22251
22252/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22253///
22254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163)
22255#[inline]
22256#[target_feature(enable = "avx512f,avx512vl")]
22257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22258#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22259#[rustc_legacy_const_generics(2)]
22260pub fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
22261 unsafe {
22262 let r: __m128 = _mm_permute_ps::<MASK>(a);
22263 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
22264 }
22265}
22266
22267/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22268///
22269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161)
22270#[inline]
22271#[target_feature(enable = "avx512f")]
22272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22273#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22274#[rustc_legacy_const_generics(1)]
22275pub fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
22276 unsafe {
22277 static_assert_uimm_bits!(MASK, 8);
22278 simd_shuffle!(
22279 a,
22280 a,
22281 [
22282 MASK as u32 & 0b1,
22283 ((MASK as u32 >> 1) & 0b1),
22284 ((MASK as u32 >> 2) & 0b1) + 2,
22285 ((MASK as u32 >> 3) & 0b1) + 2,
22286 ((MASK as u32 >> 4) & 0b1) + 4,
22287 ((MASK as u32 >> 5) & 0b1) + 4,
22288 ((MASK as u32 >> 6) & 0b1) + 6,
22289 ((MASK as u32 >> 7) & 0b1) + 6,
22290 ],
22291 )
22292 }
22293}
22294
22295/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22296///
22297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159)
22298#[inline]
22299#[target_feature(enable = "avx512f")]
22300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22301#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22302#[rustc_legacy_const_generics(3)]
22303pub fn _mm512_mask_permute_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22304 unsafe {
22305 static_assert_uimm_bits!(MASK, 8);
22306 let r: __m512d = _mm512_permute_pd::<MASK>(a);
22307 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
22308 }
22309}
22310
22311/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22312///
22313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160)
22314#[inline]
22315#[target_feature(enable = "avx512f")]
22316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22317#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22318#[rustc_legacy_const_generics(2)]
22319pub fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22320 unsafe {
22321 static_assert_uimm_bits!(MASK, 8);
22322 let r: __m512d = _mm512_permute_pd::<MASK>(a);
22323 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
22324 }
22325}
22326
22327/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22328///
22329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156)
22330#[inline]
22331#[target_feature(enable = "avx512f,avx512vl")]
22332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22333#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
22334#[rustc_legacy_const_generics(3)]
22335pub fn _mm256_mask_permute_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22336 unsafe {
22337 static_assert_uimm_bits!(MASK, 4);
22338 let r: __m256d = _mm256_permute_pd::<MASK>(a);
22339 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
22340 }
22341}
22342
22343/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22344///
22345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157)
22346#[inline]
22347#[target_feature(enable = "avx512f,avx512vl")]
22348#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22349#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
22350#[rustc_legacy_const_generics(2)]
22351pub fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22352 unsafe {
22353 static_assert_uimm_bits!(MASK, 4);
22354 let r: __m256d = _mm256_permute_pd::<MASK>(a);
22355 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
22356 }
22357}
22358
22359/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22360///
22361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153)
22362#[inline]
22363#[target_feature(enable = "avx512f,avx512vl")]
22364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22365#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
22366#[rustc_legacy_const_generics(3)]
22367pub fn _mm_mask_permute_pd<const IMM2: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
22368 unsafe {
22369 static_assert_uimm_bits!(IMM2, 2);
22370 let r: __m128d = _mm_permute_pd::<IMM2>(a);
22371 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:src.as_f64x2()))
22372 }
22373}
22374
22375/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22376///
22377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154)
22378#[inline]
22379#[target_feature(enable = "avx512f,avx512vl")]
22380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22381#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
22382#[rustc_legacy_const_generics(2)]
22383pub fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
22384 unsafe {
22385 static_assert_uimm_bits!(IMM2, 2);
22386 let r: __m128d = _mm_permute_pd::<IMM2>(a);
22387 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:f64x2::ZERO))
22388 }
22389}
22390
22391/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22392///
22393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208)
22394#[inline]
22395#[target_feature(enable = "avx512f")]
22396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22397#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22398#[rustc_legacy_const_generics(1)]
22399pub fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
22400 unsafe {
22401 static_assert_uimm_bits!(MASK, 8);
22402 simd_shuffle!(
22403 a,
22404 a,
22405 [
22406 MASK as u32 & 0b11,
22407 (MASK as u32 >> 2) & 0b11,
22408 ((MASK as u32 >> 4) & 0b11),
22409 ((MASK as u32 >> 6) & 0b11),
22410 (MASK as u32 & 0b11) + 4,
22411 ((MASK as u32 >> 2) & 0b11) + 4,
22412 ((MASK as u32 >> 4) & 0b11) + 4,
22413 ((MASK as u32 >> 6) & 0b11) + 4,
22414 ],
22415 )
22416 }
22417}
22418
22419/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22420///
22421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206)
22422#[inline]
22423#[target_feature(enable = "avx512f")]
22424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22425#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22426#[rustc_legacy_const_generics(3)]
22427pub fn _mm512_mask_permutex_epi64<const MASK: i32>(
22428 src: __m512i,
22429 k: __mmask8,
22430 a: __m512i,
22431) -> __m512i {
22432 unsafe {
22433 static_assert_uimm_bits!(MASK, 8);
22434 let r: __m512i = _mm512_permutex_epi64::<MASK>(a);
22435 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
22436 }
22437}
22438
22439/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22440///
22441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207)
22442#[inline]
22443#[target_feature(enable = "avx512f")]
22444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22445#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22446#[rustc_legacy_const_generics(2)]
22447pub fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
22448 unsafe {
22449 static_assert_uimm_bits!(MASK, 8);
22450 let r: __m512i = _mm512_permutex_epi64::<MASK>(a);
22451 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
22452 }
22453}
22454
22455/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22456///
22457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205)
22458#[inline]
22459#[target_feature(enable = "avx512f,avx512vl")]
22460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22461#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22462#[rustc_legacy_const_generics(1)]
22463pub fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
22464 unsafe {
22465 static_assert_uimm_bits!(MASK, 8);
22466 simd_shuffle!(
22467 a,
22468 a,
22469 [
22470 MASK as u32 & 0b11,
22471 (MASK as u32 >> 2) & 0b11,
22472 ((MASK as u32 >> 4) & 0b11),
22473 ((MASK as u32 >> 6) & 0b11),
22474 ],
22475 )
22476 }
22477}
22478
22479/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22480///
22481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi64&expand=4203)
22482#[inline]
22483#[target_feature(enable = "avx512f,avx512vl")]
22484#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22485#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22486#[rustc_legacy_const_generics(3)]
22487pub fn _mm256_mask_permutex_epi64<const MASK: i32>(
22488 src: __m256i,
22489 k: __mmask8,
22490 a: __m256i,
22491) -> __m256i {
22492 unsafe {
22493 static_assert_uimm_bits!(MASK, 8);
22494 let r: __m256i = _mm256_permutex_epi64::<MASK>(a);
22495 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
22496 }
22497}
22498
22499/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22500///
22501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204)
22502#[inline]
22503#[target_feature(enable = "avx512f,avx512vl")]
22504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22505#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22506#[rustc_legacy_const_generics(2)]
22507pub fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
22508 unsafe {
22509 static_assert_uimm_bits!(MASK, 8);
22510 let r: __m256i = _mm256_permutex_epi64::<MASK>(a);
22511 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
22512 }
22513}
22514
22515/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22516///
22517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214)
22518#[inline]
22519#[target_feature(enable = "avx512f")]
22520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22521#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22522#[rustc_legacy_const_generics(1)]
22523pub fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
22524 unsafe {
22525 static_assert_uimm_bits!(MASK, 8);
22526 simd_shuffle!(
22527 a,
22528 a,
22529 [
22530 MASK as u32 & 0b11,
22531 (MASK as u32 >> 2) & 0b11,
22532 ((MASK as u32 >> 4) & 0b11),
22533 ((MASK as u32 >> 6) & 0b11),
22534 (MASK as u32 & 0b11) + 4,
22535 ((MASK as u32 >> 2) & 0b11) + 4,
22536 ((MASK as u32 >> 4) & 0b11) + 4,
22537 ((MASK as u32 >> 6) & 0b11) + 4,
22538 ],
22539 )
22540 }
22541}
22542
22543/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22544///
22545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212)
22546#[inline]
22547#[target_feature(enable = "avx512f")]
22548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22549#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22550#[rustc_legacy_const_generics(3)]
22551pub fn _mm512_mask_permutex_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22552 unsafe {
22553 let r: __m512d = _mm512_permutex_pd::<MASK>(a);
22554 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
22555 }
22556}
22557
22558/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22559///
22560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213)
22561#[inline]
22562#[target_feature(enable = "avx512f")]
22563#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22564#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22565#[rustc_legacy_const_generics(2)]
22566pub fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22567 unsafe {
22568 let r: __m512d = _mm512_permutex_pd::<MASK>(a);
22569 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
22570 }
22571}
22572
22573/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22574///
22575/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211)
22576#[inline]
22577#[target_feature(enable = "avx512f,avx512vl")]
22578#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22579#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22580#[rustc_legacy_const_generics(1)]
22581pub fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
22582 unsafe {
22583 static_assert_uimm_bits!(MASK, 8);
22584 simd_shuffle!(
22585 a,
22586 a,
22587 [
22588 MASK as u32 & 0b11,
22589 (MASK as u32 >> 2) & 0b11,
22590 ((MASK as u32 >> 4) & 0b11),
22591 ((MASK as u32 >> 6) & 0b11),
22592 ],
22593 )
22594 }
22595}
22596
22597/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22598///
22599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209)
22600#[inline]
22601#[target_feature(enable = "avx512f,avx512vl")]
22602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22603#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22604#[rustc_legacy_const_generics(3)]
22605pub fn _mm256_mask_permutex_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22606 unsafe {
22607 static_assert_uimm_bits!(MASK, 8);
22608 let r: __m256d = _mm256_permutex_pd::<MASK>(a);
22609 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
22610 }
22611}
22612
22613/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22614///
22615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210)
22616#[inline]
22617#[target_feature(enable = "avx512f,avx512vl")]
22618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22619#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22620#[rustc_legacy_const_generics(2)]
22621pub fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22622 unsafe {
22623 static_assert_uimm_bits!(MASK, 8);
22624 let r: __m256d = _mm256_permutex_pd::<MASK>(a);
22625 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
22626 }
22627}
22628
22629/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22630///
22631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182)
22632#[inline]
22633#[target_feature(enable = "avx512f")]
22634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22635#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22636pub fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22637 unsafe { transmute(src:vpermd(a.as_i32x16(), idx.as_i32x16())) }
22638}
22639
22640/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22641///
22642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181)
22643#[inline]
22644#[target_feature(enable = "avx512f")]
22645#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22646#[cfg_attr(test, assert_instr(vpermd))]
22647pub fn _mm512_mask_permutevar_epi32(
22648 src: __m512i,
22649 k: __mmask16,
22650 idx: __m512i,
22651 a: __m512i,
22652) -> __m512i {
22653 unsafe {
22654 let permute: i32x16 = _mm512_permutevar_epi32(idx, a).as_i32x16();
22655 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x16()))
22656 }
22657}
22658
22659/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22660///
22661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
22662#[inline]
22663#[target_feature(enable = "avx512f")]
22664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22665#[cfg_attr(test, assert_instr(vpermilps))]
22666pub fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
22667 unsafe { transmute(src:vpermilps(a.as_f32x16(), b.as_i32x16())) }
22668}
22669
22670/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22671///
22672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198)
22673#[inline]
22674#[target_feature(enable = "avx512f")]
22675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22676#[cfg_attr(test, assert_instr(vpermilps))]
22677pub fn _mm512_mask_permutevar_ps(src: __m512, k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22678 unsafe {
22679 let permute: f32x16 = _mm512_permutevar_ps(a, b).as_f32x16();
22680 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x16()))
22681 }
22682}
22683
22684/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22685///
22686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199)
22687#[inline]
22688#[target_feature(enable = "avx512f")]
22689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22690#[cfg_attr(test, assert_instr(vpermilps))]
22691pub fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22692 unsafe {
22693 let permute: f32x16 = _mm512_permutevar_ps(a, b).as_f32x16();
22694 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x16::ZERO))
22695 }
22696}
22697
22698/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22699///
22700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195)
22701#[inline]
22702#[target_feature(enable = "avx512f,avx512vl")]
22703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22704#[cfg_attr(test, assert_instr(vpermilps))]
22705pub fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22706 unsafe {
22707 let permute: f32x8 = _mm256_permutevar_ps(a, b).as_f32x8();
22708 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x8()))
22709 }
22710}
22711
22712/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22713///
22714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196)
22715#[inline]
22716#[target_feature(enable = "avx512f,avx512vl")]
22717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22718#[cfg_attr(test, assert_instr(vpermilps))]
22719pub fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22720 unsafe {
22721 let permute: f32x8 = _mm256_permutevar_ps(a, b).as_f32x8();
22722 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x8::ZERO))
22723 }
22724}
22725
22726/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22727///
22728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192)
22729#[inline]
22730#[target_feature(enable = "avx512f,avx512vl")]
22731#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22732#[cfg_attr(test, assert_instr(vpermilps))]
22733pub fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22734 unsafe {
22735 let permute: f32x4 = _mm_permutevar_ps(a, b).as_f32x4();
22736 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x4()))
22737 }
22738}
22739
22740/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22741///
22742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193)
22743#[inline]
22744#[target_feature(enable = "avx512f,avx512vl")]
22745#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22746#[cfg_attr(test, assert_instr(vpermilps))]
22747pub fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22748 unsafe {
22749 let permute: f32x4 = _mm_permutevar_ps(a, b).as_f32x4();
22750 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x4::ZERO))
22751 }
22752}
22753
22754/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22755///
22756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191)
22757#[inline]
22758#[target_feature(enable = "avx512f")]
22759#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22760#[cfg_attr(test, assert_instr(vpermilpd))]
22761pub fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
22762 unsafe { transmute(src:vpermilpd(a.as_f64x8(), b.as_i64x8())) }
22763}
22764
22765/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22766///
22767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189)
22768#[inline]
22769#[target_feature(enable = "avx512f")]
22770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22771#[cfg_attr(test, assert_instr(vpermilpd))]
22772pub fn _mm512_mask_permutevar_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22773 unsafe {
22774 let permute: f64x8 = _mm512_permutevar_pd(a, b).as_f64x8();
22775 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x8()))
22776 }
22777}
22778
22779/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22780///
22781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190)
22782#[inline]
22783#[target_feature(enable = "avx512f")]
22784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22785#[cfg_attr(test, assert_instr(vpermilpd))]
22786pub fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22787 unsafe {
22788 let permute: f64x8 = _mm512_permutevar_pd(a, b).as_f64x8();
22789 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x8::ZERO))
22790 }
22791}
22792
22793/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22794///
22795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186)
22796#[inline]
22797#[target_feature(enable = "avx512f,avx512vl")]
22798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22799#[cfg_attr(test, assert_instr(vpermilpd))]
22800pub fn _mm256_mask_permutevar_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22801 unsafe {
22802 let permute: f64x4 = _mm256_permutevar_pd(a, b).as_f64x4();
22803 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x4()))
22804 }
22805}
22806
22807/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22808///
22809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187)
22810#[inline]
22811#[target_feature(enable = "avx512f,avx512vl")]
22812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22813#[cfg_attr(test, assert_instr(vpermilpd))]
22814pub fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22815 unsafe {
22816 let permute: f64x4 = _mm256_permutevar_pd(a, b).as_f64x4();
22817 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x4::ZERO))
22818 }
22819}
22820
22821/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22822///
22823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183)
22824#[inline]
22825#[target_feature(enable = "avx512f,avx512vl")]
22826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22827#[cfg_attr(test, assert_instr(vpermilpd))]
22828pub fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22829 unsafe {
22830 let permute: f64x2 = _mm_permutevar_pd(a, b).as_f64x2();
22831 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x2()))
22832 }
22833}
22834
22835/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22836///
22837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184)
22838#[inline]
22839#[target_feature(enable = "avx512f,avx512vl")]
22840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22841#[cfg_attr(test, assert_instr(vpermilpd))]
22842pub fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22843 unsafe {
22844 let permute: f64x2 = _mm_permutevar_pd(a, b).as_f64x2();
22845 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x2::ZERO))
22846 }
22847}
22848
22849/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22850///
22851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301)
22852#[inline]
22853#[target_feature(enable = "avx512f")]
22854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22855#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22856pub fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22857 unsafe { transmute(src:vpermd(a.as_i32x16(), idx.as_i32x16())) }
22858}
22859
22860/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22861///
22862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299)
22863#[inline]
22864#[target_feature(enable = "avx512f")]
22865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22866#[cfg_attr(test, assert_instr(vpermd))]
22867pub fn _mm512_mask_permutexvar_epi32(
22868 src: __m512i,
22869 k: __mmask16,
22870 idx: __m512i,
22871 a: __m512i,
22872) -> __m512i {
22873 unsafe {
22874 let permute: i32x16 = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22875 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x16()))
22876 }
22877}
22878
22879/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22880///
22881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300)
22882#[inline]
22883#[target_feature(enable = "avx512f")]
22884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22885#[cfg_attr(test, assert_instr(vpermd))]
22886pub fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
22887 unsafe {
22888 let permute: i32x16 = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22889 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x16::ZERO))
22890 }
22891}
22892
22893/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22894///
22895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298)
22896#[inline]
22897#[target_feature(enable = "avx512f,avx512vl")]
22898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22899#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22900pub fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
22901 _mm256_permutevar8x32_epi32(a, b:idx) // llvm use llvm.x86.avx2.permd
22902}
22903
22904/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22905///
22906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296)
22907#[inline]
22908#[target_feature(enable = "avx512f,avx512vl")]
22909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22910#[cfg_attr(test, assert_instr(vpermd))]
22911pub fn _mm256_mask_permutexvar_epi32(
22912 src: __m256i,
22913 k: __mmask8,
22914 idx: __m256i,
22915 a: __m256i,
22916) -> __m256i {
22917 unsafe {
22918 let permute: i32x8 = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22919 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x8()))
22920 }
22921}
22922
22923/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22924///
22925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297)
22926#[inline]
22927#[target_feature(enable = "avx512f,avx512vl")]
22928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22929#[cfg_attr(test, assert_instr(vpermd))]
22930pub fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
22931 unsafe {
22932 let permute: i32x8 = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22933 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x8::ZERO))
22934 }
22935}
22936
22937/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22938///
22939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307)
22940#[inline]
22941#[target_feature(enable = "avx512f")]
22942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22943#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22944pub fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
22945 unsafe { transmute(src:vpermq(a.as_i64x8(), idx.as_i64x8())) }
22946}
22947
22948/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22949///
22950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305)
22951#[inline]
22952#[target_feature(enable = "avx512f")]
22953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22954#[cfg_attr(test, assert_instr(vpermq))]
22955pub fn _mm512_mask_permutexvar_epi64(
22956 src: __m512i,
22957 k: __mmask8,
22958 idx: __m512i,
22959 a: __m512i,
22960) -> __m512i {
22961 unsafe {
22962 let permute: i64x8 = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22963 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i64x8()))
22964 }
22965}
22966
22967/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22968///
22969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306)
22970#[inline]
22971#[target_feature(enable = "avx512f")]
22972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22973#[cfg_attr(test, assert_instr(vpermq))]
22974pub fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
22975 unsafe {
22976 let permute: i64x8 = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22977 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x8::ZERO))
22978 }
22979}
22980
22981/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22982///
22983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304)
22984#[inline]
22985#[target_feature(enable = "avx512f,avx512vl")]
22986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22987#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22988pub fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
22989 unsafe { transmute(src:vpermq256(a.as_i64x4(), idx.as_i64x4())) }
22990}
22991
22992/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22993///
22994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302)
22995#[inline]
22996#[target_feature(enable = "avx512f,avx512vl")]
22997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
22998#[cfg_attr(test, assert_instr(vpermq))]
22999pub fn _mm256_mask_permutexvar_epi64(
23000 src: __m256i,
23001 k: __mmask8,
23002 idx: __m256i,
23003 a: __m256i,
23004) -> __m256i {
23005 unsafe {
23006 let permute: i64x4 = _mm256_permutexvar_epi64(idx, a).as_i64x4();
23007 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i64x4()))
23008 }
23009}
23010
23011/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23012///
23013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303)
23014#[inline]
23015#[target_feature(enable = "avx512f,avx512vl")]
23016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23017#[cfg_attr(test, assert_instr(vpermq))]
23018pub fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
23019 unsafe {
23020 let permute: i64x4 = _mm256_permutexvar_epi64(idx, a).as_i64x4();
23021 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x4::ZERO))
23022 }
23023}
23024
23025/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23026///
23027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_ps&expand=4200)
23028#[inline]
23029#[target_feature(enable = "avx512f")]
23030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23031#[cfg_attr(test, assert_instr(vpermps))]
23032pub fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
23033 unsafe { transmute(src:vpermps(a.as_f32x16(), idx.as_i32x16())) }
23034}
23035
23036/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23037///
23038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326)
23039#[inline]
23040#[target_feature(enable = "avx512f")]
23041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23042#[cfg_attr(test, assert_instr(vpermps))]
23043pub fn _mm512_mask_permutexvar_ps(src: __m512, k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23044 unsafe {
23045 let permute: f32x16 = _mm512_permutexvar_ps(idx, a).as_f32x16();
23046 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x16()))
23047 }
23048}
23049
23050/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23051///
23052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327)
23053#[inline]
23054#[target_feature(enable = "avx512f")]
23055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23056#[cfg_attr(test, assert_instr(vpermps))]
23057pub fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
23058 unsafe {
23059 let permute: f32x16 = _mm512_permutexvar_ps(idx, a).as_f32x16();
23060 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x16::ZERO))
23061 }
23062}
23063
23064/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
23065///
23066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325)
23067#[inline]
23068#[target_feature(enable = "avx512f,avx512vl")]
23069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23070#[cfg_attr(test, assert_instr(vpermps))]
23071pub fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
23072 _mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
23073}
23074
23075/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23076///
23077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323)
23078#[inline]
23079#[target_feature(enable = "avx512f,avx512vl")]
23080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23081#[cfg_attr(test, assert_instr(vpermps))]
23082pub fn _mm256_mask_permutexvar_ps(src: __m256, k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23083 unsafe {
23084 let permute: f32x8 = _mm256_permutexvar_ps(idx, a).as_f32x8();
23085 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x8()))
23086 }
23087}
23088
23089/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23090///
23091/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324)
23092#[inline]
23093#[target_feature(enable = "avx512f,avx512vl")]
23094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23095#[cfg_attr(test, assert_instr(vpermps))]
23096pub fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23097 unsafe {
23098 let permute: f32x8 = _mm256_permutexvar_ps(idx, a).as_f32x8();
23099 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x8::ZERO))
23100 }
23101}
23102
23103/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23104///
23105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322)
23106#[inline]
23107#[target_feature(enable = "avx512f")]
23108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23109#[cfg_attr(test, assert_instr(vpermpd))]
23110pub fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
23111 unsafe { transmute(src:vpermpd(a.as_f64x8(), idx.as_i64x8())) }
23112}
23113
23114/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23115///
23116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320)
23117#[inline]
23118#[target_feature(enable = "avx512f")]
23119#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23120#[cfg_attr(test, assert_instr(vpermpd))]
23121pub fn _mm512_mask_permutexvar_pd(src: __m512d, k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23122 unsafe {
23123 let permute: f64x8 = _mm512_permutexvar_pd(idx, a).as_f64x8();
23124 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x8()))
23125 }
23126}
23127
23128/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23129///
23130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321)
23131#[inline]
23132#[target_feature(enable = "avx512f")]
23133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23134#[cfg_attr(test, assert_instr(vpermpd))]
23135pub fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23136 unsafe {
23137 let permute: f64x8 = _mm512_permutexvar_pd(idx, a).as_f64x8();
23138 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x8::ZERO))
23139 }
23140}
23141
23142/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23143///
23144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319)
23145#[inline]
23146#[target_feature(enable = "avx512f,avx512vl")]
23147#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23148#[cfg_attr(test, assert_instr(vpermpd))]
23149pub fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
23150 unsafe { transmute(src:vpermpd256(a.as_f64x4(), idx.as_i64x4())) }
23151}
23152
23153/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23154///
23155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317)
23156#[inline]
23157#[target_feature(enable = "avx512f,avx512vl")]
23158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23159#[cfg_attr(test, assert_instr(vpermpd))]
23160pub fn _mm256_mask_permutexvar_pd(src: __m256d, k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23161 unsafe {
23162 let permute: f64x4 = _mm256_permutexvar_pd(idx, a).as_f64x4();
23163 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x4()))
23164 }
23165}
23166
23167/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23168///
23169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318)
23170#[inline]
23171#[target_feature(enable = "avx512f,avx512vl")]
23172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23173#[cfg_attr(test, assert_instr(vpermpd))]
23174pub fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23175 unsafe {
23176 let permute: f64x4 = _mm256_permutexvar_pd(idx, a).as_f64x4();
23177 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x4::ZERO))
23178 }
23179}
23180
23181/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23182///
23183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238)
23184#[inline]
23185#[target_feature(enable = "avx512f")]
23186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23187#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23188pub fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23189 unsafe { transmute(src:vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16())) }
23190}
23191
23192/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23193///
23194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235)
23195#[inline]
23196#[target_feature(enable = "avx512f")]
23197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23198#[cfg_attr(test, assert_instr(vpermt2d))]
23199pub fn _mm512_mask_permutex2var_epi32(
23200 a: __m512i,
23201 k: __mmask16,
23202 idx: __m512i,
23203 b: __m512i,
23204) -> __m512i {
23205 unsafe {
23206 let permute: i32x16 = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23207 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x16()))
23208 }
23209}
23210
23211/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23212///
23213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237)
23214#[inline]
23215#[target_feature(enable = "avx512f")]
23216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23217#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23218pub fn _mm512_maskz_permutex2var_epi32(
23219 k: __mmask16,
23220 a: __m512i,
23221 idx: __m512i,
23222 b: __m512i,
23223) -> __m512i {
23224 unsafe {
23225 let permute: i32x16 = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23226 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x16::ZERO))
23227 }
23228}
23229
23230/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23231///
23232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236)
23233#[inline]
23234#[target_feature(enable = "avx512f")]
23235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23236#[cfg_attr(test, assert_instr(vpermi2d))]
23237pub fn _mm512_mask2_permutex2var_epi32(
23238 a: __m512i,
23239 idx: __m512i,
23240 k: __mmask16,
23241 b: __m512i,
23242) -> __m512i {
23243 unsafe {
23244 let permute: i32x16 = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23245 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x16()))
23246 }
23247}
23248
23249/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23250///
23251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234)
23252#[inline]
23253#[target_feature(enable = "avx512f,avx512vl")]
23254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23255#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23256pub fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23257 unsafe { transmute(src:vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8())) }
23258}
23259
23260/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23261///
23262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231)
23263#[inline]
23264#[target_feature(enable = "avx512f,avx512vl")]
23265#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23266#[cfg_attr(test, assert_instr(vpermt2d))]
23267pub fn _mm256_mask_permutex2var_epi32(
23268 a: __m256i,
23269 k: __mmask8,
23270 idx: __m256i,
23271 b: __m256i,
23272) -> __m256i {
23273 unsafe {
23274 let permute: i32x8 = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23275 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x8()))
23276 }
23277}
23278
23279/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23280///
23281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233)
23282#[inline]
23283#[target_feature(enable = "avx512f,avx512vl")]
23284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23285#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23286pub fn _mm256_maskz_permutex2var_epi32(
23287 k: __mmask8,
23288 a: __m256i,
23289 idx: __m256i,
23290 b: __m256i,
23291) -> __m256i {
23292 unsafe {
23293 let permute: i32x8 = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23294 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x8::ZERO))
23295 }
23296}
23297
23298/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23299///
23300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232)
23301#[inline]
23302#[target_feature(enable = "avx512f,avx512vl")]
23303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23304#[cfg_attr(test, assert_instr(vpermi2d))]
23305pub fn _mm256_mask2_permutex2var_epi32(
23306 a: __m256i,
23307 idx: __m256i,
23308 k: __mmask8,
23309 b: __m256i,
23310) -> __m256i {
23311 unsafe {
23312 let permute: i32x8 = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23313 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x8()))
23314 }
23315}
23316
23317/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23318///
23319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230)
23320#[inline]
23321#[target_feature(enable = "avx512f,avx512vl")]
23322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23323#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23324pub fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23325 unsafe { transmute(src:vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4())) }
23326}
23327
23328/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23329///
23330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227)
23331#[inline]
23332#[target_feature(enable = "avx512f,avx512vl")]
23333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23334#[cfg_attr(test, assert_instr(vpermt2d))]
23335pub fn _mm_mask_permutex2var_epi32(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23336 unsafe {
23337 let permute: i32x4 = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23338 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x4()))
23339 }
23340}
23341
23342/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23343///
23344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229)
23345#[inline]
23346#[target_feature(enable = "avx512f,avx512vl")]
23347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23348#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23349pub fn _mm_maskz_permutex2var_epi32(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23350 unsafe {
23351 let permute: i32x4 = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23352 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x4::ZERO))
23353 }
23354}
23355
23356/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23357///
23358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228)
23359#[inline]
23360#[target_feature(enable = "avx512f,avx512vl")]
23361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23362#[cfg_attr(test, assert_instr(vpermi2d))]
23363pub fn _mm_mask2_permutex2var_epi32(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23364 unsafe {
23365 let permute: i32x4 = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23366 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x4()))
23367 }
23368}
23369
23370/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23371///
23372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250)
23373#[inline]
23374#[target_feature(enable = "avx512f")]
23375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23376#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23377pub fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23378 unsafe { transmute(src:vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8())) }
23379}
23380
23381/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23382///
23383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247)
23384#[inline]
23385#[target_feature(enable = "avx512f")]
23386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23387#[cfg_attr(test, assert_instr(vpermt2q))]
23388pub fn _mm512_mask_permutex2var_epi64(
23389 a: __m512i,
23390 k: __mmask8,
23391 idx: __m512i,
23392 b: __m512i,
23393) -> __m512i {
23394 unsafe {
23395 let permute: i64x8 = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23396 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x8()))
23397 }
23398}
23399
23400/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23401///
23402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249)
23403#[inline]
23404#[target_feature(enable = "avx512f")]
23405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23406#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23407pub fn _mm512_maskz_permutex2var_epi64(
23408 k: __mmask8,
23409 a: __m512i,
23410 idx: __m512i,
23411 b: __m512i,
23412) -> __m512i {
23413 unsafe {
23414 let permute: i64x8 = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23415 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x8::ZERO))
23416 }
23417}
23418
23419/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23420///
23421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248)
23422#[inline]
23423#[target_feature(enable = "avx512f")]
23424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23425#[cfg_attr(test, assert_instr(vpermi2q))]
23426pub fn _mm512_mask2_permutex2var_epi64(
23427 a: __m512i,
23428 idx: __m512i,
23429 k: __mmask8,
23430 b: __m512i,
23431) -> __m512i {
23432 unsafe {
23433 let permute: i64x8 = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23434 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x8()))
23435 }
23436}
23437
23438/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23439///
23440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246)
23441#[inline]
23442#[target_feature(enable = "avx512f,avx512vl")]
23443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23444#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23445pub fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23446 unsafe { transmute(src:vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4())) }
23447}
23448
23449/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23450///
23451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243)
23452#[inline]
23453#[target_feature(enable = "avx512f,avx512vl")]
23454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23455#[cfg_attr(test, assert_instr(vpermt2q))]
23456pub fn _mm256_mask_permutex2var_epi64(
23457 a: __m256i,
23458 k: __mmask8,
23459 idx: __m256i,
23460 b: __m256i,
23461) -> __m256i {
23462 unsafe {
23463 let permute: i64x4 = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23464 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x4()))
23465 }
23466}
23467
23468/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23469///
23470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245)
23471#[inline]
23472#[target_feature(enable = "avx512f,avx512vl")]
23473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23474#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23475pub fn _mm256_maskz_permutex2var_epi64(
23476 k: __mmask8,
23477 a: __m256i,
23478 idx: __m256i,
23479 b: __m256i,
23480) -> __m256i {
23481 unsafe {
23482 let permute: i64x4 = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23483 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x4::ZERO))
23484 }
23485}
23486
23487/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23488///
23489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244)
23490#[inline]
23491#[target_feature(enable = "avx512f,avx512vl")]
23492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23493#[cfg_attr(test, assert_instr(vpermi2q))]
23494pub fn _mm256_mask2_permutex2var_epi64(
23495 a: __m256i,
23496 idx: __m256i,
23497 k: __mmask8,
23498 b: __m256i,
23499) -> __m256i {
23500 unsafe {
23501 let permute: i64x4 = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23502 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x4()))
23503 }
23504}
23505
23506/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23507///
23508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242)
23509#[inline]
23510#[target_feature(enable = "avx512f,avx512vl")]
23511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23512#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23513pub fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23514 unsafe { transmute(src:vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2())) }
23515}
23516
23517/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23518///
23519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239)
23520#[inline]
23521#[target_feature(enable = "avx512f,avx512vl")]
23522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23523#[cfg_attr(test, assert_instr(vpermt2q))]
23524pub fn _mm_mask_permutex2var_epi64(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23525 unsafe {
23526 let permute: i64x2 = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23527 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x2()))
23528 }
23529}
23530
23531/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23532///
23533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241)
23534#[inline]
23535#[target_feature(enable = "avx512f,avx512vl")]
23536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23537#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23538pub fn _mm_maskz_permutex2var_epi64(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23539 unsafe {
23540 let permute: i64x2 = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23541 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x2::ZERO))
23542 }
23543}
23544
23545/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23546///
23547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240)
23548#[inline]
23549#[target_feature(enable = "avx512f,avx512vl")]
23550#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23551#[cfg_attr(test, assert_instr(vpermi2q))]
23552pub fn _mm_mask2_permutex2var_epi64(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23553 unsafe {
23554 let permute: i64x2 = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23555 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x2()))
23556 }
23557}
23558
23559/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23560///
23561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286)
23562#[inline]
23563#[target_feature(enable = "avx512f")]
23564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23565#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23566pub fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
23567 unsafe { transmute(src:vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16())) }
23568}
23569
23570/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23571///
23572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283)
23573#[inline]
23574#[target_feature(enable = "avx512f")]
23575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23576#[cfg_attr(test, assert_instr(vpermt2ps))]
23577pub fn _mm512_mask_permutex2var_ps(a: __m512, k: __mmask16, idx: __m512i, b: __m512) -> __m512 {
23578 unsafe {
23579 let permute: f32x16 = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23580 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x16()))
23581 }
23582}
23583
23584/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23585///
23586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285)
23587#[inline]
23588#[target_feature(enable = "avx512f")]
23589#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23590#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23591pub fn _mm512_maskz_permutex2var_ps(k: __mmask16, a: __m512, idx: __m512i, b: __m512) -> __m512 {
23592 unsafe {
23593 let permute: f32x16 = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23594 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x16::ZERO))
23595 }
23596}
23597
23598/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23599///
23600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284)
23601#[inline]
23602#[target_feature(enable = "avx512f")]
23603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23604#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23605pub fn _mm512_mask2_permutex2var_ps(a: __m512, idx: __m512i, k: __mmask16, b: __m512) -> __m512 {
23606 unsafe {
23607 let permute: f32x16 = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23608 let idx: f32x16 = _mm512_castsi512_ps(idx).as_f32x16();
23609 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23610 }
23611}
23612
23613/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23614///
23615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282)
23616#[inline]
23617#[target_feature(enable = "avx512f,avx512vl")]
23618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23619#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23620pub fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
23621 unsafe { transmute(src:vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8())) }
23622}
23623
23624/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23625///
23626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279)
23627#[inline]
23628#[target_feature(enable = "avx512f,avx512vl")]
23629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23630#[cfg_attr(test, assert_instr(vpermt2ps))]
23631pub fn _mm256_mask_permutex2var_ps(a: __m256, k: __mmask8, idx: __m256i, b: __m256) -> __m256 {
23632 unsafe {
23633 let permute: f32x8 = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23634 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x8()))
23635 }
23636}
23637
23638/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23639///
23640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281)
23641#[inline]
23642#[target_feature(enable = "avx512f,avx512vl")]
23643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23644#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23645pub fn _mm256_maskz_permutex2var_ps(k: __mmask8, a: __m256, idx: __m256i, b: __m256) -> __m256 {
23646 unsafe {
23647 let permute: f32x8 = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23648 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x8::ZERO))
23649 }
23650}
23651
23652/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23653///
23654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280)
23655#[inline]
23656#[target_feature(enable = "avx512f,avx512vl")]
23657#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23658#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23659pub fn _mm256_mask2_permutex2var_ps(a: __m256, idx: __m256i, k: __mmask8, b: __m256) -> __m256 {
23660 unsafe {
23661 let permute: f32x8 = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23662 let idx: f32x8 = _mm256_castsi256_ps(idx).as_f32x8();
23663 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23664 }
23665}
23666
23667/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23668///
23669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278)
23670#[inline]
23671#[target_feature(enable = "avx512f,avx512vl")]
23672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23673#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23674pub fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
23675 unsafe { transmute(src:vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4())) }
23676}
23677
23678/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23679///
23680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275)
23681#[inline]
23682#[target_feature(enable = "avx512f,avx512vl")]
23683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23684#[cfg_attr(test, assert_instr(vpermt2ps))]
23685pub fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
23686 unsafe {
23687 let permute: f32x4 = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23688 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x4()))
23689 }
23690}
23691
23692/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23693///
23694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277)
23695#[inline]
23696#[target_feature(enable = "avx512f,avx512vl")]
23697#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23698#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23699pub fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
23700 unsafe {
23701 let permute: f32x4 = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23702 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x4::ZERO))
23703 }
23704}
23705
23706/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23707///
23708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276)
23709#[inline]
23710#[target_feature(enable = "avx512f,avx512vl")]
23711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23712#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23713pub fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
23714 unsafe {
23715 let permute: f32x4 = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23716 let idx: f32x4 = _mm_castsi128_ps(idx).as_f32x4();
23717 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23718 }
23719}
23720
23721/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23722///
23723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274)
23724#[inline]
23725#[target_feature(enable = "avx512f")]
23726#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23727#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23728pub fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23729 unsafe { transmute(src:vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8())) }
23730}
23731
23732/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23733///
23734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271)
23735#[inline]
23736#[target_feature(enable = "avx512f")]
23737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23738#[cfg_attr(test, assert_instr(vpermt2pd))]
23739pub fn _mm512_mask_permutex2var_pd(a: __m512d, k: __mmask8, idx: __m512i, b: __m512d) -> __m512d {
23740 unsafe {
23741 let permute: f64x8 = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23742 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x8()))
23743 }
23744}
23745
23746/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23747///
23748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273)
23749#[inline]
23750#[target_feature(enable = "avx512f")]
23751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23752#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23753pub fn _mm512_maskz_permutex2var_pd(k: __mmask8, a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23754 unsafe {
23755 let permute: f64x8 = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23756 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x8::ZERO))
23757 }
23758}
23759
23760/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23761///
23762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272)
23763#[inline]
23764#[target_feature(enable = "avx512f")]
23765#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23766#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23767pub fn _mm512_mask2_permutex2var_pd(a: __m512d, idx: __m512i, k: __mmask8, b: __m512d) -> __m512d {
23768 unsafe {
23769 let permute: f64x8 = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23770 let idx: f64x8 = _mm512_castsi512_pd(idx).as_f64x8();
23771 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23772 }
23773}
23774
23775/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23776///
23777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270)
23778#[inline]
23779#[target_feature(enable = "avx512f,avx512vl")]
23780#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23781#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23782pub fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23783 unsafe { transmute(src:vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4())) }
23784}
23785
23786/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23787///
23788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267)
23789#[inline]
23790#[target_feature(enable = "avx512f,avx512vl")]
23791#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23792#[cfg_attr(test, assert_instr(vpermt2pd))]
23793pub fn _mm256_mask_permutex2var_pd(a: __m256d, k: __mmask8, idx: __m256i, b: __m256d) -> __m256d {
23794 unsafe {
23795 let permute: f64x4 = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23796 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x4()))
23797 }
23798}
23799
23800/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23801///
23802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269)
23803#[inline]
23804#[target_feature(enable = "avx512f,avx512vl")]
23805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23806#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23807pub fn _mm256_maskz_permutex2var_pd(k: __mmask8, a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23808 unsafe {
23809 let permute: f64x4 = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23810 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x4::ZERO))
23811 }
23812}
23813
23814/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23815///
23816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268)
23817#[inline]
23818#[target_feature(enable = "avx512f,avx512vl")]
23819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23820#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23821pub fn _mm256_mask2_permutex2var_pd(a: __m256d, idx: __m256i, k: __mmask8, b: __m256d) -> __m256d {
23822 unsafe {
23823 let permute: f64x4 = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23824 let idx: f64x4 = _mm256_castsi256_pd(idx).as_f64x4();
23825 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23826 }
23827}
23828
23829/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23830///
23831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266)
23832#[inline]
23833#[target_feature(enable = "avx512f,avx512vl")]
23834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23835#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23836pub fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23837 unsafe { transmute(src:vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2())) }
23838}
23839
23840/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23841///
23842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263)
23843#[inline]
23844#[target_feature(enable = "avx512f,avx512vl")]
23845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23846#[cfg_attr(test, assert_instr(vpermt2pd))]
23847pub fn _mm_mask_permutex2var_pd(a: __m128d, k: __mmask8, idx: __m128i, b: __m128d) -> __m128d {
23848 unsafe {
23849 let permute: f64x2 = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23850 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x2()))
23851 }
23852}
23853
23854/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23855///
23856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265)
23857#[inline]
23858#[target_feature(enable = "avx512f,avx512vl")]
23859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23860#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23861pub fn _mm_maskz_permutex2var_pd(k: __mmask8, a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23862 unsafe {
23863 let permute: f64x2 = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23864 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x2::ZERO))
23865 }
23866}
23867
23868/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23869///
23870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264)
23871#[inline]
23872#[target_feature(enable = "avx512f,avx512vl")]
23873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23874#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23875pub fn _mm_mask2_permutex2var_pd(a: __m128d, idx: __m128i, k: __mmask8, b: __m128d) -> __m128d {
23876 unsafe {
23877 let permute: f64x2 = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23878 let idx: f64x2 = _mm_castsi128_pd(idx).as_f64x2();
23879 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23880 }
23881}
23882
23883/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
23884///
23885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi32&expand=5150)
23886#[inline]
23887#[target_feature(enable = "avx512f")]
23888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23889#[cfg_attr(test, assert_instr(vshufps, MASK = 9))] //should be vpshufd
23890#[rustc_legacy_const_generics(1)]
23891pub fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
23892 unsafe {
23893 static_assert_uimm_bits!(MASK, 8);
23894 let r: i32x16 = simd_shuffle!(
23895 a.as_i32x16(),
23896 a.as_i32x16(),
23897 [
23898 MASK as u32 & 0b11,
23899 (MASK as u32 >> 2) & 0b11,
23900 (MASK as u32 >> 4) & 0b11,
23901 (MASK as u32 >> 6) & 0b11,
23902 (MASK as u32 & 0b11) + 4,
23903 ((MASK as u32 >> 2) & 0b11) + 4,
23904 ((MASK as u32 >> 4) & 0b11) + 4,
23905 ((MASK as u32 >> 6) & 0b11) + 4,
23906 (MASK as u32 & 0b11) + 8,
23907 ((MASK as u32 >> 2) & 0b11) + 8,
23908 ((MASK as u32 >> 4) & 0b11) + 8,
23909 ((MASK as u32 >> 6) & 0b11) + 8,
23910 (MASK as u32 & 0b11) + 12,
23911 ((MASK as u32 >> 2) & 0b11) + 12,
23912 ((MASK as u32 >> 4) & 0b11) + 12,
23913 ((MASK as u32 >> 6) & 0b11) + 12,
23914 ],
23915 );
23916 transmute(r)
23917 }
23918}
23919
23920/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23921///
23922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148)
23923#[inline]
23924#[target_feature(enable = "avx512f")]
23925#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23926#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23927#[rustc_legacy_const_generics(3)]
23928pub fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23929 src: __m512i,
23930 k: __mmask16,
23931 a: __m512i,
23932) -> __m512i {
23933 unsafe {
23934 static_assert_uimm_bits!(MASK, 8);
23935 let r: __m512i = _mm512_shuffle_epi32::<MASK>(a);
23936 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
23937 }
23938}
23939
23940/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23941///
23942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149)
23943#[inline]
23944#[target_feature(enable = "avx512f")]
23945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23946#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23947#[rustc_legacy_const_generics(2)]
23948pub fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask16, a: __m512i) -> __m512i {
23949 unsafe {
23950 static_assert_uimm_bits!(MASK, 8);
23951 let r: __m512i = _mm512_shuffle_epi32::<MASK>(a);
23952 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
23953 }
23954}
23955
23956/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23957///
23958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145)
23959#[inline]
23960#[target_feature(enable = "avx512f,avx512vl")]
23961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23962#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23963#[rustc_legacy_const_generics(3)]
23964pub fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23965 src: __m256i,
23966 k: __mmask8,
23967 a: __m256i,
23968) -> __m256i {
23969 unsafe {
23970 static_assert_uimm_bits!(MASK, 8);
23971 let r: __m256i = _mm256_shuffle_epi32::<MASK>(a);
23972 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
23973 }
23974}
23975
23976/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23977///
23978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146)
23979#[inline]
23980#[target_feature(enable = "avx512f,avx512vl")]
23981#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23982#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23983#[rustc_legacy_const_generics(2)]
23984pub fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m256i) -> __m256i {
23985 unsafe {
23986 static_assert_uimm_bits!(MASK, 8);
23987 let r: __m256i = _mm256_shuffle_epi32::<MASK>(a);
23988 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
23989 }
23990}
23991
23992/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23993///
23994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142)
23995#[inline]
23996#[target_feature(enable = "avx512f,avx512vl")]
23997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
23998#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23999#[rustc_legacy_const_generics(3)]
24000pub fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
24001 src: __m128i,
24002 k: __mmask8,
24003 a: __m128i,
24004) -> __m128i {
24005 unsafe {
24006 static_assert_uimm_bits!(MASK, 8);
24007 let r: __m128i = _mm_shuffle_epi32::<MASK>(a);
24008 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
24009 }
24010}
24011
24012/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24013///
24014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143)
24015#[inline]
24016#[target_feature(enable = "avx512f,avx512vl")]
24017#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24018#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
24019#[rustc_legacy_const_generics(2)]
24020pub fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m128i) -> __m128i {
24021 unsafe {
24022 static_assert_uimm_bits!(MASK, 8);
24023 let r: __m128i = _mm_shuffle_epi32::<MASK>(a);
24024 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
24025 }
24026}
24027
24028/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
24029///
24030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203)
24031#[inline]
24032#[target_feature(enable = "avx512f")]
24033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24034#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24035#[rustc_legacy_const_generics(2)]
24036pub fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24037 unsafe {
24038 static_assert_uimm_bits!(MASK, 8);
24039 simd_shuffle!(
24040 a,
24041 b,
24042 [
24043 MASK as u32 & 0b11,
24044 (MASK as u32 >> 2) & 0b11,
24045 ((MASK as u32 >> 4) & 0b11) + 16,
24046 ((MASK as u32 >> 6) & 0b11) + 16,
24047 (MASK as u32 & 0b11) + 4,
24048 ((MASK as u32 >> 2) & 0b11) + 4,
24049 ((MASK as u32 >> 4) & 0b11) + 20,
24050 ((MASK as u32 >> 6) & 0b11) + 20,
24051 (MASK as u32 & 0b11) + 8,
24052 ((MASK as u32 >> 2) & 0b11) + 8,
24053 ((MASK as u32 >> 4) & 0b11) + 24,
24054 ((MASK as u32 >> 6) & 0b11) + 24,
24055 (MASK as u32 & 0b11) + 12,
24056 ((MASK as u32 >> 2) & 0b11) + 12,
24057 ((MASK as u32 >> 4) & 0b11) + 28,
24058 ((MASK as u32 >> 6) & 0b11) + 28,
24059 ],
24060 )
24061 }
24062}
24063
24064/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24065///
24066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201)
24067#[inline]
24068#[target_feature(enable = "avx512f")]
24069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24070#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24071#[rustc_legacy_const_generics(4)]
24072pub fn _mm512_mask_shuffle_ps<const MASK: i32>(
24073 src: __m512,
24074 k: __mmask16,
24075 a: __m512,
24076 b: __m512,
24077) -> __m512 {
24078 unsafe {
24079 static_assert_uimm_bits!(MASK, 8);
24080 let r: __m512 = _mm512_shuffle_ps::<MASK>(a, b);
24081 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
24082 }
24083}
24084
24085/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24086///
24087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202)
24088#[inline]
24089#[target_feature(enable = "avx512f")]
24090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24091#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24092#[rustc_legacy_const_generics(3)]
24093pub fn _mm512_maskz_shuffle_ps<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24094 unsafe {
24095 static_assert_uimm_bits!(MASK, 8);
24096 let r: __m512 = _mm512_shuffle_ps::<MASK>(a, b);
24097 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
24098 }
24099}
24100
24101/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24102///
24103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198)
24104#[inline]
24105#[target_feature(enable = "avx512f,avx512vl")]
24106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24107#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24108#[rustc_legacy_const_generics(4)]
24109pub fn _mm256_mask_shuffle_ps<const MASK: i32>(
24110 src: __m256,
24111 k: __mmask8,
24112 a: __m256,
24113 b: __m256,
24114) -> __m256 {
24115 unsafe {
24116 static_assert_uimm_bits!(MASK, 8);
24117 let r: __m256 = _mm256_shuffle_ps::<MASK>(a, b);
24118 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
24119 }
24120}
24121
24122/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24123///
24124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199)
24125#[inline]
24126#[target_feature(enable = "avx512f,avx512vl")]
24127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24128#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24129#[rustc_legacy_const_generics(3)]
24130pub fn _mm256_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24131 unsafe {
24132 static_assert_uimm_bits!(MASK, 8);
24133 let r: __m256 = _mm256_shuffle_ps::<MASK>(a, b);
24134 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
24135 }
24136}
24137
24138/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24139///
24140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195)
24141#[inline]
24142#[target_feature(enable = "avx512f,avx512vl")]
24143#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24144#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24145#[rustc_legacy_const_generics(4)]
24146pub fn _mm_mask_shuffle_ps<const MASK: i32>(
24147 src: __m128,
24148 k: __mmask8,
24149 a: __m128,
24150 b: __m128,
24151) -> __m128 {
24152 unsafe {
24153 static_assert_uimm_bits!(MASK, 8);
24154 let r: __m128 = _mm_shuffle_ps::<MASK>(a, b);
24155 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
24156 }
24157}
24158
24159/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24160///
24161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196)
24162#[inline]
24163#[target_feature(enable = "avx512f,avx512vl")]
24164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24165#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24166#[rustc_legacy_const_generics(3)]
24167pub fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
24168 unsafe {
24169 static_assert_uimm_bits!(MASK, 8);
24170 let r: __m128 = _mm_shuffle_ps::<MASK>(a, b);
24171 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
24172 }
24173}
24174
24175/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
24176///
24177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192)
24178#[inline]
24179#[target_feature(enable = "avx512f")]
24180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24181#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24182#[rustc_legacy_const_generics(2)]
24183pub fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24184 unsafe {
24185 static_assert_uimm_bits!(MASK, 8);
24186 simd_shuffle!(
24187 a,
24188 b,
24189 [
24190 MASK as u32 & 0b1,
24191 ((MASK as u32 >> 1) & 0b1) + 8,
24192 ((MASK as u32 >> 2) & 0b1) + 2,
24193 ((MASK as u32 >> 3) & 0b1) + 10,
24194 ((MASK as u32 >> 4) & 0b1) + 4,
24195 ((MASK as u32 >> 5) & 0b1) + 12,
24196 ((MASK as u32 >> 6) & 0b1) + 6,
24197 ((MASK as u32 >> 7) & 0b1) + 14,
24198 ],
24199 )
24200 }
24201}
24202
24203/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24204///
24205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190)
24206#[inline]
24207#[target_feature(enable = "avx512f")]
24208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24209#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24210#[rustc_legacy_const_generics(4)]
24211pub fn _mm512_mask_shuffle_pd<const MASK: i32>(
24212 src: __m512d,
24213 k: __mmask8,
24214 a: __m512d,
24215 b: __m512d,
24216) -> __m512d {
24217 unsafe {
24218 static_assert_uimm_bits!(MASK, 8);
24219 let r: __m512d = _mm512_shuffle_pd::<MASK>(a, b);
24220 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
24221 }
24222}
24223
24224/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24225///
24226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191)
24227#[inline]
24228#[target_feature(enable = "avx512f")]
24229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24230#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24231#[rustc_legacy_const_generics(3)]
24232pub fn _mm512_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24233 unsafe {
24234 static_assert_uimm_bits!(MASK, 8);
24235 let r: __m512d = _mm512_shuffle_pd::<MASK>(a, b);
24236 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
24237 }
24238}
24239
24240/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24241///
24242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187)
24243#[inline]
24244#[target_feature(enable = "avx512f,avx512vl")]
24245#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24246#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24247#[rustc_legacy_const_generics(4)]
24248pub fn _mm256_mask_shuffle_pd<const MASK: i32>(
24249 src: __m256d,
24250 k: __mmask8,
24251 a: __m256d,
24252 b: __m256d,
24253) -> __m256d {
24254 unsafe {
24255 static_assert_uimm_bits!(MASK, 8);
24256 let r: __m256d = _mm256_shuffle_pd::<MASK>(a, b);
24257 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
24258 }
24259}
24260
24261/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24262///
24263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188)
24264#[inline]
24265#[target_feature(enable = "avx512f,avx512vl")]
24266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24267#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24268#[rustc_legacy_const_generics(3)]
24269pub fn _mm256_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24270 unsafe {
24271 static_assert_uimm_bits!(MASK, 8);
24272 let r: __m256d = _mm256_shuffle_pd::<MASK>(a, b);
24273 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
24274 }
24275}
24276
24277/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24278///
24279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184)
24280#[inline]
24281#[target_feature(enable = "avx512f,avx512vl")]
24282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24283#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
24284#[rustc_legacy_const_generics(4)]
24285pub fn _mm_mask_shuffle_pd<const MASK: i32>(
24286 src: __m128d,
24287 k: __mmask8,
24288 a: __m128d,
24289 b: __m128d,
24290) -> __m128d {
24291 unsafe {
24292 static_assert_uimm_bits!(MASK, 8);
24293 let r: __m128d = _mm_shuffle_pd::<MASK>(a, b);
24294 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:src.as_f64x2()))
24295 }
24296}
24297
24298/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24299///
24300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185)
24301#[inline]
24302#[target_feature(enable = "avx512f,avx512vl")]
24303#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24304#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
24305#[rustc_legacy_const_generics(3)]
24306pub fn _mm_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
24307 unsafe {
24308 static_assert_uimm_bits!(MASK, 8);
24309 let r: __m128d = _mm_shuffle_pd::<MASK>(a, b);
24310 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:f64x2::ZERO))
24311 }
24312}
24313
24314/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24315///
24316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32x4&expand=5177)
24317#[inline]
24318#[target_feature(enable = "avx512f")]
24319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24320#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4
24321#[rustc_legacy_const_generics(2)]
24322pub fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24323 unsafe {
24324 static_assert_uimm_bits!(MASK, 8);
24325 let a = a.as_i32x16();
24326 let b = b.as_i32x16();
24327 let r: i32x16 = simd_shuffle!(
24328 a,
24329 b,
24330 [
24331 (MASK as u32 & 0b11) * 4 + 0,
24332 (MASK as u32 & 0b11) * 4 + 1,
24333 (MASK as u32 & 0b11) * 4 + 2,
24334 (MASK as u32 & 0b11) * 4 + 3,
24335 ((MASK as u32 >> 2) & 0b11) * 4 + 0,
24336 ((MASK as u32 >> 2) & 0b11) * 4 + 1,
24337 ((MASK as u32 >> 2) & 0b11) * 4 + 2,
24338 ((MASK as u32 >> 2) & 0b11) * 4 + 3,
24339 ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
24340 ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
24341 ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
24342 ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
24343 ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
24344 ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
24345 ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
24346 ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
24347 ],
24348 );
24349 transmute(r)
24350 }
24351}
24352
24353/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24354///
24355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x4&expand=5175)
24356#[inline]
24357#[target_feature(enable = "avx512f")]
24358#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24359#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
24360#[rustc_legacy_const_generics(4)]
24361pub fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
24362 src: __m512i,
24363 k: __mmask16,
24364 a: __m512i,
24365 b: __m512i,
24366) -> __m512i {
24367 unsafe {
24368 static_assert_uimm_bits!(MASK, 8);
24369 let r: __m512i = _mm512_shuffle_i32x4::<MASK>(a, b);
24370 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
24371 }
24372}
24373
24374/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24375///
24376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32x4&expand=5176)
24377#[inline]
24378#[target_feature(enable = "avx512f")]
24379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24380#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
24381#[rustc_legacy_const_generics(3)]
24382pub fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
24383 k: __mmask16,
24384 a: __m512i,
24385 b: __m512i,
24386) -> __m512i {
24387 unsafe {
24388 static_assert_uimm_bits!(MASK, 8);
24389 let r: __m512i = _mm512_shuffle_i32x4::<MASK>(a, b);
24390 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
24391 }
24392}
24393
24394/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24395///
24396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174)
24397#[inline]
24398#[target_feature(enable = "avx512f,avx512vl")]
24399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24400#[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4
24401#[rustc_legacy_const_generics(2)]
24402pub fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24403 unsafe {
24404 static_assert_uimm_bits!(MASK, 8);
24405 let a: i32x8 = a.as_i32x8();
24406 let b: i32x8 = b.as_i32x8();
24407 let r: i32x8 = simd_shuffle!(
24408 a,
24409 b,
24410 [
24411 (MASK as u32 & 0b1) * 4 + 0,
24412 (MASK as u32 & 0b1) * 4 + 1,
24413 (MASK as u32 & 0b1) * 4 + 2,
24414 (MASK as u32 & 0b1) * 4 + 3,
24415 ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
24416 ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
24417 ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
24418 ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
24419 ],
24420 );
24421 transmute(src:r)
24422 }
24423}
24424
24425/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24426///
24427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172)
24428#[inline]
24429#[target_feature(enable = "avx512f,avx512vl")]
24430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24431#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
24432#[rustc_legacy_const_generics(4)]
24433pub fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
24434 src: __m256i,
24435 k: __mmask8,
24436 a: __m256i,
24437 b: __m256i,
24438) -> __m256i {
24439 unsafe {
24440 static_assert_uimm_bits!(MASK, 8);
24441 let r: __m256i = _mm256_shuffle_i32x4::<MASK>(a, b);
24442 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
24443 }
24444}
24445
24446/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24447///
24448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173)
24449#[inline]
24450#[target_feature(enable = "avx512f,avx512vl")]
24451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24452#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
24453#[rustc_legacy_const_generics(3)]
24454pub fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24455 unsafe {
24456 static_assert_uimm_bits!(MASK, 8);
24457 let r: __m256i = _mm256_shuffle_i32x4::<MASK>(a, b);
24458 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
24459 }
24460}
24461
24462/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24463///
24464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183)
24465#[inline]
24466#[target_feature(enable = "avx512f")]
24467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24468#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24469#[rustc_legacy_const_generics(2)]
24470pub fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24471 unsafe {
24472 static_assert_uimm_bits!(MASK, 8);
24473 let a: i64x8 = a.as_i64x8();
24474 let b: i64x8 = b.as_i64x8();
24475 let r: i64x8 = simd_shuffle!(
24476 a,
24477 b,
24478 [
24479 (MASK as u32 & 0b11) * 2 + 0,
24480 (MASK as u32 & 0b11) * 2 + 1,
24481 ((MASK as u32 >> 2) & 0b11) * 2 + 0,
24482 ((MASK as u32 >> 2) & 0b11) * 2 + 1,
24483 ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
24484 ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
24485 ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
24486 ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
24487 ],
24488 );
24489 transmute(src:r)
24490 }
24491}
24492
24493/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24494///
24495/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x2&expand=5181)
24496#[inline]
24497#[target_feature(enable = "avx512f")]
24498#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24499#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24500#[rustc_legacy_const_generics(4)]
24501pub fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
24502 src: __m512i,
24503 k: __mmask8,
24504 a: __m512i,
24505 b: __m512i,
24506) -> __m512i {
24507 unsafe {
24508 static_assert_uimm_bits!(MASK, 8);
24509 let r: __m512i = _mm512_shuffle_i64x2::<MASK>(a, b);
24510 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
24511 }
24512}
24513
24514/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24515///
24516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64x2&expand=5182)
24517#[inline]
24518#[target_feature(enable = "avx512f")]
24519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24520#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24521#[rustc_legacy_const_generics(3)]
24522pub fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
24523 unsafe {
24524 static_assert_uimm_bits!(MASK, 8);
24525 let r: __m512i = _mm512_shuffle_i64x2::<MASK>(a, b);
24526 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
24527 }
24528}
24529
24530/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24531///
24532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180)
24533#[inline]
24534#[target_feature(enable = "avx512f,avx512vl")]
24535#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24536#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2
24537#[rustc_legacy_const_generics(2)]
24538pub fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24539 unsafe {
24540 static_assert_uimm_bits!(MASK, 8);
24541 let a: i64x4 = a.as_i64x4();
24542 let b: i64x4 = b.as_i64x4();
24543 let r: i64x4 = simd_shuffle!(
24544 a,
24545 b,
24546 [
24547 (MASK as u32 & 0b1) * 2 + 0,
24548 (MASK as u32 & 0b1) * 2 + 1,
24549 ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
24550 ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
24551 ],
24552 );
24553 transmute(src:r)
24554 }
24555}
24556
24557/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24558///
24559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178)
24560#[inline]
24561#[target_feature(enable = "avx512f,avx512vl")]
24562#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24563#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
24564#[rustc_legacy_const_generics(4)]
24565pub fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
24566 src: __m256i,
24567 k: __mmask8,
24568 a: __m256i,
24569 b: __m256i,
24570) -> __m256i {
24571 unsafe {
24572 static_assert_uimm_bits!(MASK, 8);
24573 let r: __m256i = _mm256_shuffle_i64x2::<MASK>(a, b);
24574 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
24575 }
24576}
24577
24578/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24579///
24580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179)
24581#[inline]
24582#[target_feature(enable = "avx512f,avx512vl")]
24583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24584#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
24585#[rustc_legacy_const_generics(3)]
24586pub fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24587 unsafe {
24588 static_assert_uimm_bits!(MASK, 8);
24589 let r: __m256i = _mm256_shuffle_i64x2::<MASK>(a, b);
24590 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
24591 }
24592}
24593
24594/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24595///
24596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165)
24597#[inline]
24598#[target_feature(enable = "avx512f")]
24599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24600#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2
24601#[rustc_legacy_const_generics(2)]
24602pub fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24603 unsafe {
24604 static_assert_uimm_bits!(MASK, 8);
24605 let a = a.as_f32x16();
24606 let b = b.as_f32x16();
24607 let r: f32x16 = simd_shuffle!(
24608 a,
24609 b,
24610 [
24611 (MASK as u32 & 0b11) * 4 + 0,
24612 (MASK as u32 & 0b11) * 4 + 1,
24613 (MASK as u32 & 0b11) * 4 + 2,
24614 (MASK as u32 & 0b11) * 4 + 3,
24615 ((MASK as u32 >> 2) & 0b11) * 4 + 0,
24616 ((MASK as u32 >> 2) & 0b11) * 4 + 1,
24617 ((MASK as u32 >> 2) & 0b11) * 4 + 2,
24618 ((MASK as u32 >> 2) & 0b11) * 4 + 3,
24619 ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
24620 ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
24621 ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
24622 ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
24623 ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
24624 ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
24625 ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
24626 ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
24627 ],
24628 );
24629 transmute(r)
24630 }
24631}
24632
24633/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24634///
24635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32x4&expand=5163)
24636#[inline]
24637#[target_feature(enable = "avx512f")]
24638#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24639#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
24640#[rustc_legacy_const_generics(4)]
24641pub fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
24642 src: __m512,
24643 k: __mmask16,
24644 a: __m512,
24645 b: __m512,
24646) -> __m512 {
24647 unsafe {
24648 static_assert_uimm_bits!(MASK, 8);
24649 let r: __m512 = _mm512_shuffle_f32x4::<MASK>(a, b);
24650 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
24651 }
24652}
24653
24654/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24655///
24656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32x4&expand=5164)
24657#[inline]
24658#[target_feature(enable = "avx512f")]
24659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24660#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
24661#[rustc_legacy_const_generics(3)]
24662pub fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24663 unsafe {
24664 static_assert_uimm_bits!(MASK, 8);
24665 let r: __m512 = _mm512_shuffle_f32x4::<MASK>(a, b);
24666 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
24667 }
24668}
24669
24670/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24671///
24672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162)
24673#[inline]
24674#[target_feature(enable = "avx512f,avx512vl")]
24675#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24676#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4
24677#[rustc_legacy_const_generics(2)]
24678pub fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
24679 unsafe {
24680 static_assert_uimm_bits!(MASK, 8);
24681 let a: f32x8 = a.as_f32x8();
24682 let b: f32x8 = b.as_f32x8();
24683 let r: f32x8 = simd_shuffle!(
24684 a,
24685 b,
24686 [
24687 (MASK as u32 & 0b1) * 4 + 0,
24688 (MASK as u32 & 0b1) * 4 + 1,
24689 (MASK as u32 & 0b1) * 4 + 2,
24690 (MASK as u32 & 0b1) * 4 + 3,
24691 ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
24692 ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
24693 ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
24694 ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
24695 ],
24696 );
24697 transmute(src:r)
24698 }
24699}
24700
24701/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24702///
24703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160)
24704#[inline]
24705#[target_feature(enable = "avx512f,avx512vl")]
24706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24707#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
24708#[rustc_legacy_const_generics(4)]
24709pub fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
24710 src: __m256,
24711 k: __mmask8,
24712 a: __m256,
24713 b: __m256,
24714) -> __m256 {
24715 unsafe {
24716 static_assert_uimm_bits!(MASK, 8);
24717 let r: __m256 = _mm256_shuffle_f32x4::<MASK>(a, b);
24718 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
24719 }
24720}
24721
24722/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24723///
24724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161)
24725#[inline]
24726#[target_feature(enable = "avx512f,avx512vl")]
24727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24728#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
24729#[rustc_legacy_const_generics(3)]
24730pub fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24731 unsafe {
24732 static_assert_uimm_bits!(MASK, 8);
24733 let r: __m256 = _mm256_shuffle_f32x4::<MASK>(a, b);
24734 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
24735 }
24736}
24737
24738/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24739///
24740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171)
24741#[inline]
24742#[target_feature(enable = "avx512f")]
24743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24744#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24745#[rustc_legacy_const_generics(2)]
24746pub fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24747 unsafe {
24748 static_assert_uimm_bits!(MASK, 8);
24749 let a: f64x8 = a.as_f64x8();
24750 let b: f64x8 = b.as_f64x8();
24751 let r: f64x8 = simd_shuffle!(
24752 a,
24753 b,
24754 [
24755 (MASK as u32 & 0b11) * 2 + 0,
24756 (MASK as u32 & 0b11) * 2 + 1,
24757 ((MASK as u32 >> 2) & 0b11) * 2 + 0,
24758 ((MASK as u32 >> 2) & 0b11) * 2 + 1,
24759 ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
24760 ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
24761 ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
24762 ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
24763 ],
24764 );
24765 transmute(src:r)
24766 }
24767}
24768
24769/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24770///
24771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169)
24772#[inline]
24773#[target_feature(enable = "avx512f")]
24774#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24775#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24776#[rustc_legacy_const_generics(4)]
24777pub fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
24778 src: __m512d,
24779 k: __mmask8,
24780 a: __m512d,
24781 b: __m512d,
24782) -> __m512d {
24783 unsafe {
24784 static_assert_uimm_bits!(MASK, 8);
24785 let r: __m512d = _mm512_shuffle_f64x2::<MASK>(a, b);
24786 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
24787 }
24788}
24789
24790/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24791///
24792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170)
24793#[inline]
24794#[target_feature(enable = "avx512f")]
24795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24796#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24797#[rustc_legacy_const_generics(3)]
24798pub fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24799 unsafe {
24800 static_assert_uimm_bits!(MASK, 8);
24801 let r: __m512d = _mm512_shuffle_f64x2::<MASK>(a, b);
24802 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
24803 }
24804}
24805
24806/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24807///
24808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168)
24809#[inline]
24810#[target_feature(enable = "avx512f,avx512vl")]
24811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24812#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2
24813#[rustc_legacy_const_generics(2)]
24814pub fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
24815 unsafe {
24816 static_assert_uimm_bits!(MASK, 8);
24817 let a: f64x4 = a.as_f64x4();
24818 let b: f64x4 = b.as_f64x4();
24819 let r: f64x4 = simd_shuffle!(
24820 a,
24821 b,
24822 [
24823 (MASK as u32 & 0b1) * 2 + 0,
24824 (MASK as u32 & 0b1) * 2 + 1,
24825 ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
24826 ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
24827 ],
24828 );
24829 transmute(src:r)
24830 }
24831}
24832
24833/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24834///
24835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166)
24836#[inline]
24837#[target_feature(enable = "avx512f,avx512vl")]
24838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24839#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
24840#[rustc_legacy_const_generics(4)]
24841pub fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
24842 src: __m256d,
24843 k: __mmask8,
24844 a: __m256d,
24845 b: __m256d,
24846) -> __m256d {
24847 unsafe {
24848 static_assert_uimm_bits!(MASK, 8);
24849 let r: __m256d = _mm256_shuffle_f64x2::<MASK>(a, b);
24850 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
24851 }
24852}
24853
24854/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24855///
24856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167)
24857#[inline]
24858#[target_feature(enable = "avx512f,avx512vl")]
24859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24860#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
24861#[rustc_legacy_const_generics(3)]
24862pub fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24863 unsafe {
24864 static_assert_uimm_bits!(MASK, 8);
24865 let r: __m256d = _mm256_shuffle_f64x2::<MASK>(a, b);
24866 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
24867 }
24868}
24869
24870/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24871///
24872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442)
24873#[inline]
24874#[target_feature(enable = "avx512f")]
24875#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24876#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
24877#[rustc_legacy_const_generics(1)]
24878pub fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
24879 unsafe {
24880 static_assert_uimm_bits!(IMM8, 2);
24881 match IMM8 & 0x3 {
24882 0 => simd_shuffle!(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
24883 1 => simd_shuffle!(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
24884 2 => simd_shuffle!(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
24885 _ => simd_shuffle!(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
24886 }
24887 }
24888}
24889
24890/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24891///
24892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443)
24893#[inline]
24894#[target_feature(enable = "avx512f")]
24895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24896#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
24897#[rustc_legacy_const_generics(3)]
24898pub fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m512) -> __m128 {
24899 unsafe {
24900 static_assert_uimm_bits!(IMM8, 2);
24901 let r: __m128 = _mm512_extractf32x4_ps::<IMM8>(a);
24902 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
24903 }
24904}
24905
24906/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24907///
24908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444)
24909#[inline]
24910#[target_feature(enable = "avx512f")]
24911#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24912#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 3))]
24913#[rustc_legacy_const_generics(2)]
24914pub fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
24915 unsafe {
24916 static_assert_uimm_bits!(IMM8, 2);
24917 let r: __m128 = _mm512_extractf32x4_ps::<IMM8>(a);
24918 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
24919 }
24920}
24921
24922/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24923///
24924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439)
24925#[inline]
24926#[target_feature(enable = "avx512f,avx512vl")]
24927#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24928#[cfg_attr(
24929 test,
24930 assert_instr(vextract, IMM8 = 1) //should be vextractf32x4
24931)]
24932#[rustc_legacy_const_generics(1)]
24933pub fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
24934 unsafe {
24935 static_assert_uimm_bits!(IMM8, 1);
24936 match IMM8 & 0x1 {
24937 0 => simd_shuffle!(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
24938 _ => simd_shuffle!(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
24939 }
24940 }
24941}
24942
24943/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24944///
24945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440)
24946#[inline]
24947#[target_feature(enable = "avx512f,avx512vl")]
24948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24949#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
24950#[rustc_legacy_const_generics(3)]
24951pub fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m256) -> __m128 {
24952 unsafe {
24953 static_assert_uimm_bits!(IMM8, 1);
24954 let r: __m128 = _mm256_extractf32x4_ps::<IMM8>(a);
24955 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
24956 }
24957}
24958
24959/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24960///
24961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441)
24962#[inline]
24963#[target_feature(enable = "avx512f,avx512vl")]
24964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24965#[cfg_attr(test, assert_instr(vextractf32x4, IMM8 = 1))]
24966#[rustc_legacy_const_generics(2)]
24967pub fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
24968 unsafe {
24969 static_assert_uimm_bits!(IMM8, 1);
24970 let r: __m128 = _mm256_extractf32x4_ps::<IMM8>(a);
24971 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
24972 }
24973}
24974
24975/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
24976///
24977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473)
24978#[inline]
24979#[target_feature(enable = "avx512f")]
24980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
24981#[cfg_attr(
24982 test,
24983 assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4
24984)]
24985#[rustc_legacy_const_generics(1)]
24986pub fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
24987 unsafe {
24988 static_assert_uimm_bits!(IMM1, 1);
24989 match IMM1 {
24990 0 => simd_shuffle!(a, _mm512_setzero_si512(), [0, 1, 2, 3]),
24991 _ => simd_shuffle!(a, _mm512_setzero_si512(), [4, 5, 6, 7]),
24992 }
24993 }
24994}
24995
24996/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24997///
24998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474)
24999#[inline]
25000#[target_feature(enable = "avx512f")]
25001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25002#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
25003#[rustc_legacy_const_generics(3)]
25004pub fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
25005 src: __m256i,
25006 k: __mmask8,
25007 a: __m512i,
25008) -> __m256i {
25009 unsafe {
25010 static_assert_uimm_bits!(IMM1, 1);
25011 let r: __m256i = _mm512_extracti64x4_epi64::<IMM1>(a);
25012 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
25013 }
25014}
25015
25016/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25017///
25018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
25019#[inline]
25020#[target_feature(enable = "avx512f")]
25021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25022#[cfg_attr(test, assert_instr(vextracti64x4, IMM1 = 1))]
25023#[rustc_legacy_const_generics(2)]
25024pub fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
25025 unsafe {
25026 static_assert_uimm_bits!(IMM1, 1);
25027 let r: __m256i = _mm512_extracti64x4_epi64::<IMM1>(a);
25028 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
25029 }
25030}
25031
25032/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
25033///
25034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454)
25035#[inline]
25036#[target_feature(enable = "avx512f")]
25037#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25038#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25039#[rustc_legacy_const_generics(1)]
25040pub fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
25041 unsafe {
25042 static_assert_uimm_bits!(IMM8, 1);
25043 match IMM8 & 0x1 {
25044 0 => simd_shuffle!(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
25045 _ => simd_shuffle!(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
25046 }
25047 }
25048}
25049
25050/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25051///
25052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455)
25053#[inline]
25054#[target_feature(enable = "avx512f")]
25055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25056#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25057#[rustc_legacy_const_generics(3)]
25058pub fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
25059 src: __m256d,
25060 k: __mmask8,
25061 a: __m512d,
25062) -> __m256d {
25063 unsafe {
25064 static_assert_uimm_bits!(IMM8, 1);
25065 let r: __m256d = _mm512_extractf64x4_pd::<IMM8>(a);
25066 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
25067 }
25068}
25069
25070/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25071///
25072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456)
25073#[inline]
25074#[target_feature(enable = "avx512f")]
25075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25076#[cfg_attr(test, assert_instr(vextractf64x4, IMM8 = 1))]
25077#[rustc_legacy_const_generics(2)]
25078pub fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
25079 unsafe {
25080 static_assert_uimm_bits!(IMM8, 1);
25081 let r: __m256d = _mm512_extractf64x4_pd::<IMM8>(a);
25082 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
25083 }
25084}
25085
25086/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
25087///
25088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461)
25089#[inline]
25090#[target_feature(enable = "avx512f")]
25091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25092#[cfg_attr(
25093 test,
25094 assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4
25095)]
25096#[rustc_legacy_const_generics(1)]
25097pub fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
25098 unsafe {
25099 static_assert_uimm_bits!(IMM2, 2);
25100 let a: i32x16 = a.as_i32x16();
25101 let zero: i32x16 = i32x16::ZERO;
25102 let extract: i32x4 = match IMM2 {
25103 0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25104 1 => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25105 2 => simd_shuffle!(a, zero, [8, 9, 10, 11]),
25106 _ => simd_shuffle!(a, zero, [12, 13, 14, 15]),
25107 };
25108 transmute(src:extract)
25109 }
25110}
25111
25112/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25113///
25114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462)
25115#[inline]
25116#[target_feature(enable = "avx512f")]
25117#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25118#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
25119#[rustc_legacy_const_generics(3)]
25120pub fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
25121 src: __m128i,
25122 k: __mmask8,
25123 a: __m512i,
25124) -> __m128i {
25125 unsafe {
25126 static_assert_uimm_bits!(IMM2, 2);
25127 let r: __m128i = _mm512_extracti32x4_epi32::<IMM2>(a);
25128 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
25129 }
25130}
25131
25132/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25133///
25134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
25135#[inline]
25136#[target_feature(enable = "avx512f")]
25137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25138#[cfg_attr(test, assert_instr(vextracti32x4, IMM2 = 3))]
25139#[rustc_legacy_const_generics(2)]
25140pub fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
25141 unsafe {
25142 static_assert_uimm_bits!(IMM2, 2);
25143 let r: __m128i = _mm512_extracti32x4_epi32::<IMM2>(a);
25144 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
25145 }
25146}
25147
25148/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
25149///
25150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458)
25151#[inline]
25152#[target_feature(enable = "avx512f,avx512vl")]
25153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25154#[cfg_attr(
25155 test,
25156 assert_instr(vextract, IMM1 = 1) //should be vextracti32x4
25157)]
25158#[rustc_legacy_const_generics(1)]
25159pub fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
25160 unsafe {
25161 static_assert_uimm_bits!(IMM1, 1);
25162 let a: i32x8 = a.as_i32x8();
25163 let zero: i32x8 = i32x8::ZERO;
25164 let extract: i32x4 = match IMM1 {
25165 0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25166 _ => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25167 };
25168 transmute(src:extract)
25169 }
25170}
25171
25172/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25173///
25174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459)
25175#[inline]
25176#[target_feature(enable = "avx512f,avx512vl")]
25177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25178#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
25179#[rustc_legacy_const_generics(3)]
25180pub fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
25181 src: __m128i,
25182 k: __mmask8,
25183 a: __m256i,
25184) -> __m128i {
25185 unsafe {
25186 static_assert_uimm_bits!(IMM1, 1);
25187 let r: __m128i = _mm256_extracti32x4_epi32::<IMM1>(a);
25188 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
25189 }
25190}
25191
25192/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25193///
25194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
25195#[inline]
25196#[target_feature(enable = "avx512f,avx512vl")]
25197#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25198#[cfg_attr(test, assert_instr(vextracti32x4, IMM1 = 1))]
25199#[rustc_legacy_const_generics(2)]
25200pub fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
25201 unsafe {
25202 static_assert_uimm_bits!(IMM1, 1);
25203 let r: __m128i = _mm256_extracti32x4_epi32::<IMM1>(a);
25204 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
25205 }
25206}
25207
25208/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25209///
25210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862)
25211#[inline]
25212#[target_feature(enable = "avx512f")]
25213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25214#[cfg_attr(test, assert_instr(vmovsldup))]
25215pub fn _mm512_moveldup_ps(a: __m512) -> __m512 {
25216 unsafe {
25217 let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25218 transmute(src:r)
25219 }
25220}
25221
25222/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25223///
25224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860)
25225#[inline]
25226#[target_feature(enable = "avx512f")]
25227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25228#[cfg_attr(test, assert_instr(vmovsldup))]
25229pub fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25230 unsafe {
25231 let mov: f32x16 =
25232 simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25233 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
25234 }
25235}
25236
25237/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25238///
25239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861)
25240#[inline]
25241#[target_feature(enable = "avx512f")]
25242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25243#[cfg_attr(test, assert_instr(vmovsldup))]
25244pub fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
25245 unsafe {
25246 let mov: f32x16 =
25247 simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25248 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x16::ZERO))
25249 }
25250}
25251
25252/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25253///
25254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857)
25255#[inline]
25256#[target_feature(enable = "avx512f,avx512vl")]
25257#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25258#[cfg_attr(test, assert_instr(vmovsldup))]
25259pub fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25260 unsafe {
25261 let mov: __m256 = _mm256_moveldup_ps(a);
25262 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:src.as_f32x8()))
25263 }
25264}
25265
25266/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25267///
25268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858)
25269#[inline]
25270#[target_feature(enable = "avx512f,avx512vl")]
25271#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25272#[cfg_attr(test, assert_instr(vmovsldup))]
25273pub fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
25274 unsafe {
25275 let mov: __m256 = _mm256_moveldup_ps(a);
25276 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:f32x8::ZERO))
25277 }
25278}
25279
25280/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25281///
25282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854)
25283#[inline]
25284#[target_feature(enable = "avx512f,avx512vl")]
25285#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25286#[cfg_attr(test, assert_instr(vmovsldup))]
25287pub fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25288 unsafe {
25289 let mov: __m128 = _mm_moveldup_ps(a);
25290 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:src.as_f32x4()))
25291 }
25292}
25293
25294/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25295///
25296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855)
25297#[inline]
25298#[target_feature(enable = "avx512f,avx512vl")]
25299#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25300#[cfg_attr(test, assert_instr(vmovsldup))]
25301pub fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
25302 unsafe {
25303 let mov: __m128 = _mm_moveldup_ps(a);
25304 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:f32x4::ZERO))
25305 }
25306}
25307
25308/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25309///
25310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movehdup_ps&expand=3852)
25311#[inline]
25312#[target_feature(enable = "avx512f")]
25313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25314#[cfg_attr(test, assert_instr(vmovshdup))]
25315pub fn _mm512_movehdup_ps(a: __m512) -> __m512 {
25316 unsafe {
25317 let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25318 transmute(src:r)
25319 }
25320}
25321
25322/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25323///
25324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup_ps&expand=3850)
25325#[inline]
25326#[target_feature(enable = "avx512f")]
25327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25328#[cfg_attr(test, assert_instr(vmovshdup))]
25329pub fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25330 unsafe {
25331 let mov: f32x16 =
25332 simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25333 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
25334 }
25335}
25336
25337/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25338///
25339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movehdup_ps&expand=3851)
25340#[inline]
25341#[target_feature(enable = "avx512f")]
25342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25343#[cfg_attr(test, assert_instr(vmovshdup))]
25344pub fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
25345 unsafe {
25346 let mov: f32x16 =
25347 simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25348 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x16::ZERO))
25349 }
25350}
25351
25352/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25353///
25354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847)
25355#[inline]
25356#[target_feature(enable = "avx512f,avx512vl")]
25357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25358#[cfg_attr(test, assert_instr(vmovshdup))]
25359pub fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25360 unsafe {
25361 let mov: __m256 = _mm256_movehdup_ps(a);
25362 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:src.as_f32x8()))
25363 }
25364}
25365
25366/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25367///
25368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848)
25369#[inline]
25370#[target_feature(enable = "avx512f,avx512vl")]
25371#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25372#[cfg_attr(test, assert_instr(vmovshdup))]
25373pub fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
25374 unsafe {
25375 let mov: __m256 = _mm256_movehdup_ps(a);
25376 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:f32x8::ZERO))
25377 }
25378}
25379
25380/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25381///
25382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844)
25383#[inline]
25384#[target_feature(enable = "avx512f,avx512vl")]
25385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25386#[cfg_attr(test, assert_instr(vmovshdup))]
25387pub fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25388 unsafe {
25389 let mov: __m128 = _mm_movehdup_ps(a);
25390 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:src.as_f32x4()))
25391 }
25392}
25393
25394/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25395///
25396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845)
25397#[inline]
25398#[target_feature(enable = "avx512f,avx512vl")]
25399#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25400#[cfg_attr(test, assert_instr(vmovshdup))]
25401pub fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
25402 unsafe {
25403 let mov: __m128 = _mm_movehdup_ps(a);
25404 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:f32x4::ZERO))
25405 }
25406}
25407
25408/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
25409///
25410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843)
25411#[inline]
25412#[target_feature(enable = "avx512f")]
25413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25414#[cfg_attr(test, assert_instr(vmovddup))]
25415pub fn _mm512_movedup_pd(a: __m512d) -> __m512d {
25416 unsafe {
25417 let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25418 transmute(src:r)
25419 }
25420}
25421
25422/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25423///
25424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841)
25425#[inline]
25426#[target_feature(enable = "avx512f")]
25427#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25428#[cfg_attr(test, assert_instr(vmovddup))]
25429pub fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
25430 unsafe {
25431 let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25432 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x8()))
25433 }
25434}
25435
25436/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25437///
25438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842)
25439#[inline]
25440#[target_feature(enable = "avx512f")]
25441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25442#[cfg_attr(test, assert_instr(vmovddup))]
25443pub fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
25444 unsafe {
25445 let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25446 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x8::ZERO))
25447 }
25448}
25449
25450/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25451///
25452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838)
25453#[inline]
25454#[target_feature(enable = "avx512f,avx512vl")]
25455#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25456#[cfg_attr(test, assert_instr(vmovddup))]
25457pub fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
25458 unsafe {
25459 let mov: __m256d = _mm256_movedup_pd(a);
25460 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x4(), no:src.as_f64x4()))
25461 }
25462}
25463
25464/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25465///
25466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839)
25467#[inline]
25468#[target_feature(enable = "avx512f,avx512vl")]
25469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25470#[cfg_attr(test, assert_instr(vmovddup))]
25471pub fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
25472 unsafe {
25473 let mov: __m256d = _mm256_movedup_pd(a);
25474 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x4(), no:f64x4::ZERO))
25475 }
25476}
25477
25478/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25479///
25480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835)
25481#[inline]
25482#[target_feature(enable = "avx512f,avx512vl")]
25483#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25484#[cfg_attr(test, assert_instr(vmovddup))]
25485pub fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
25486 unsafe {
25487 let mov: __m128d = _mm_movedup_pd(a);
25488 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x2(), no:src.as_f64x2()))
25489 }
25490}
25491
25492/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25493///
25494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836)
25495#[inline]
25496#[target_feature(enable = "avx512f,avx512vl")]
25497#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25498#[cfg_attr(test, assert_instr(vmovddup))]
25499pub fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
25500 unsafe {
25501 let mov: __m128d = _mm_movedup_pd(a);
25502 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x2(), no:f64x2::ZERO))
25503 }
25504}
25505
25506/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25507///
25508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174)
25509#[inline]
25510#[target_feature(enable = "avx512f")]
25511#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25512#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4
25513#[rustc_legacy_const_generics(2)]
25514pub fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
25515 unsafe {
25516 static_assert_uimm_bits!(IMM8, 2);
25517 let a = a.as_i32x16();
25518 let b = _mm512_castsi128_si512(b).as_i32x16();
25519 let ret: i32x16 = match IMM8 & 0b11 {
25520 0 => {
25521 simd_shuffle!(
25522 a,
25523 b,
25524 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
25525 )
25526 }
25527 1 => {
25528 simd_shuffle!(
25529 a,
25530 b,
25531 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
25532 )
25533 }
25534 2 => {
25535 simd_shuffle!(
25536 a,
25537 b,
25538 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
25539 )
25540 }
25541 _ => {
25542 simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
25543 }
25544 };
25545 transmute(ret)
25546 }
25547}
25548
25549/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25550///
25551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175)
25552#[inline]
25553#[target_feature(enable = "avx512f")]
25554#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25555#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
25556#[rustc_legacy_const_generics(4)]
25557pub fn _mm512_mask_inserti32x4<const IMM8: i32>(
25558 src: __m512i,
25559 k: __mmask16,
25560 a: __m512i,
25561 b: __m128i,
25562) -> __m512i {
25563 unsafe {
25564 static_assert_uimm_bits!(IMM8, 2);
25565 let r: __m512i = _mm512_inserti32x4::<IMM8>(a, b);
25566 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
25567 }
25568}
25569
25570/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25571///
25572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176)
25573#[inline]
25574#[target_feature(enable = "avx512f")]
25575#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25576#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
25577#[rustc_legacy_const_generics(3)]
25578pub fn _mm512_maskz_inserti32x4<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m128i) -> __m512i {
25579 unsafe {
25580 static_assert_uimm_bits!(IMM8, 2);
25581 let r: __m512i = _mm512_inserti32x4::<IMM8>(a, b);
25582 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
25583 }
25584}
25585
25586/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25587///
25588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171)
25589#[inline]
25590#[target_feature(enable = "avx512f,avx512vl")]
25591#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25592#[cfg_attr(
25593 test,
25594 assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4
25595)]
25596#[rustc_legacy_const_generics(2)]
25597pub fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
25598 unsafe {
25599 static_assert_uimm_bits!(IMM8, 1);
25600 let a: i32x8 = a.as_i32x8();
25601 let b: i32x8 = _mm256_castsi128_si256(b).as_i32x8();
25602 let ret: i32x8 = match IMM8 & 0b1 {
25603 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25604 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25605 };
25606 transmute(src:ret)
25607 }
25608}
25609
25610/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25611///
25612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172)
25613#[inline]
25614#[target_feature(enable = "avx512f,avx512vl")]
25615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25616#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
25617#[rustc_legacy_const_generics(4)]
25618pub fn _mm256_mask_inserti32x4<const IMM8: i32>(
25619 src: __m256i,
25620 k: __mmask8,
25621 a: __m256i,
25622 b: __m128i,
25623) -> __m256i {
25624 unsafe {
25625 static_assert_uimm_bits!(IMM8, 1);
25626 let r: __m256i = _mm256_inserti32x4::<IMM8>(a, b);
25627 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
25628 }
25629}
25630
25631/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25632///
25633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173)
25634#[inline]
25635#[target_feature(enable = "avx512f,avx512vl")]
25636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25637#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 1))]
25638#[rustc_legacy_const_generics(3)]
25639pub fn _mm256_maskz_inserti32x4<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m128i) -> __m256i {
25640 unsafe {
25641 static_assert_uimm_bits!(IMM8, 1);
25642 let r: __m256i = _mm256_inserti32x4::<IMM8>(a, b);
25643 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
25644 }
25645}
25646
25647/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
25648///
25649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186)
25650#[inline]
25651#[target_feature(enable = "avx512f")]
25652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25653#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4
25654#[rustc_legacy_const_generics(2)]
25655pub fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
25656 unsafe {
25657 static_assert_uimm_bits!(IMM8, 1);
25658 let b: __m512i = _mm512_castsi256_si512(b);
25659 match IMM8 & 0b1 {
25660 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25661 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25662 }
25663 }
25664}
25665
25666/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25667///
25668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187)
25669#[inline]
25670#[target_feature(enable = "avx512f")]
25671#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25672#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
25673#[rustc_legacy_const_generics(4)]
25674pub fn _mm512_mask_inserti64x4<const IMM8: i32>(
25675 src: __m512i,
25676 k: __mmask8,
25677 a: __m512i,
25678 b: __m256i,
25679) -> __m512i {
25680 unsafe {
25681 static_assert_uimm_bits!(IMM8, 1);
25682 let r: __m512i = _mm512_inserti64x4::<IMM8>(a, b);
25683 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
25684 }
25685}
25686
25687/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25688///
25689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188)
25690#[inline]
25691#[target_feature(enable = "avx512f")]
25692#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25693#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
25694#[rustc_legacy_const_generics(3)]
25695pub fn _mm512_maskz_inserti64x4<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m256i) -> __m512i {
25696 unsafe {
25697 static_assert_uimm_bits!(IMM8, 1);
25698 let r: __m512i = _mm512_inserti64x4::<IMM8>(a, b);
25699 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
25700 }
25701}
25702
25703/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25704///
25705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155)
25706#[inline]
25707#[target_feature(enable = "avx512f")]
25708#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25709#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25710#[rustc_legacy_const_generics(2)]
25711pub fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
25712 unsafe {
25713 static_assert_uimm_bits!(IMM8, 2);
25714 let b = _mm512_castps128_ps512(b);
25715 match IMM8 & 0b11 {
25716 0 => {
25717 simd_shuffle!(
25718 a,
25719 b,
25720 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
25721 )
25722 }
25723 1 => {
25724 simd_shuffle!(
25725 a,
25726 b,
25727 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
25728 )
25729 }
25730 2 => {
25731 simd_shuffle!(
25732 a,
25733 b,
25734 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
25735 )
25736 }
25737 _ => {
25738 simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
25739 }
25740 }
25741 }
25742}
25743
25744/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25745///
25746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156)
25747#[inline]
25748#[target_feature(enable = "avx512f")]
25749#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25750#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25751#[rustc_legacy_const_generics(4)]
25752pub fn _mm512_mask_insertf32x4<const IMM8: i32>(
25753 src: __m512,
25754 k: __mmask16,
25755 a: __m512,
25756 b: __m128,
25757) -> __m512 {
25758 unsafe {
25759 static_assert_uimm_bits!(IMM8, 2);
25760 let r: __m512 = _mm512_insertf32x4::<IMM8>(a, b);
25761 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
25762 }
25763}
25764
25765/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25766///
25767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157)
25768#[inline]
25769#[target_feature(enable = "avx512f")]
25770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25771#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25772#[rustc_legacy_const_generics(3)]
25773pub fn _mm512_maskz_insertf32x4<const IMM8: i32>(k: __mmask16, a: __m512, b: __m128) -> __m512 {
25774 unsafe {
25775 static_assert_uimm_bits!(IMM8, 2);
25776 let r: __m512 = _mm512_insertf32x4::<IMM8>(a, b);
25777 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
25778 }
25779}
25780
25781/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25782///
25783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152)
25784#[inline]
25785#[target_feature(enable = "avx512f,avx512vl")]
25786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25787#[cfg_attr(
25788 test,
25789 assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4
25790)]
25791#[rustc_legacy_const_generics(2)]
25792pub fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
25793 unsafe {
25794 static_assert_uimm_bits!(IMM8, 1);
25795 let b: __m256 = _mm256_castps128_ps256(b);
25796 match IMM8 & 0b1 {
25797 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25798 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25799 }
25800 }
25801}
25802
25803/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25804///
25805/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153)
25806#[inline]
25807#[target_feature(enable = "avx512f,avx512vl")]
25808#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25809#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
25810#[rustc_legacy_const_generics(4)]
25811pub fn _mm256_mask_insertf32x4<const IMM8: i32>(
25812 src: __m256,
25813 k: __mmask8,
25814 a: __m256,
25815 b: __m128,
25816) -> __m256 {
25817 unsafe {
25818 static_assert_uimm_bits!(IMM8, 1);
25819 let r: __m256 = _mm256_insertf32x4::<IMM8>(a, b);
25820 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
25821 }
25822}
25823
25824/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25825///
25826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154)
25827#[inline]
25828#[target_feature(enable = "avx512f,avx512vl")]
25829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25830#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 1))]
25831#[rustc_legacy_const_generics(3)]
25832pub fn _mm256_maskz_insertf32x4<const IMM8: i32>(k: __mmask8, a: __m256, b: __m128) -> __m256 {
25833 unsafe {
25834 static_assert_uimm_bits!(IMM8, 1);
25835 let r: __m256 = _mm256_insertf32x4::<IMM8>(a, b);
25836 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
25837 }
25838}
25839
25840/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
25841///
25842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167)
25843#[inline]
25844#[target_feature(enable = "avx512f")]
25845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25846#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25847#[rustc_legacy_const_generics(2)]
25848pub fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
25849 unsafe {
25850 static_assert_uimm_bits!(IMM8, 1);
25851 let b: __m512d = _mm512_castpd256_pd512(b);
25852 match IMM8 & 0b1 {
25853 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25854 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25855 }
25856 }
25857}
25858
25859/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25860///
25861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168)
25862#[inline]
25863#[target_feature(enable = "avx512f")]
25864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25865#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25866#[rustc_legacy_const_generics(4)]
25867pub fn _mm512_mask_insertf64x4<const IMM8: i32>(
25868 src: __m512d,
25869 k: __mmask8,
25870 a: __m512d,
25871 b: __m256d,
25872) -> __m512d {
25873 unsafe {
25874 static_assert_uimm_bits!(IMM8, 1);
25875 let r: __m512d = _mm512_insertf64x4::<IMM8>(a, b);
25876 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
25877 }
25878}
25879
25880/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25881///
25882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169)
25883#[inline]
25884#[target_feature(enable = "avx512f")]
25885#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25886#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25887#[rustc_legacy_const_generics(3)]
25888pub fn _mm512_maskz_insertf64x4<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m256d) -> __m512d {
25889 unsafe {
25890 static_assert_uimm_bits!(IMM8, 1);
25891 let r: __m512d = _mm512_insertf64x4::<IMM8>(a, b);
25892 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
25893 }
25894}
25895
25896/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
25897///
25898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021)
25899#[inline]
25900#[target_feature(enable = "avx512f")]
25901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25902#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
25903pub fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
25904 unsafe {
25905 let a: i32x16 = a.as_i32x16();
25906 let b: i32x16 = b.as_i32x16();
25907 #[rustfmt::skip]
25908 let r: i32x16 = simd_shuffle!(
25909 a, b,
25910 [ 2, 18, 3, 19,
25911 2 + 4, 18 + 4, 3 + 4, 19 + 4,
25912 2 + 8, 18 + 8, 3 + 8, 19 + 8,
25913 2 + 12, 18 + 12, 3 + 12, 19 + 12],
25914 );
25915 transmute(src:r)
25916 }
25917}
25918
25919/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25920///
25921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019)
25922#[inline]
25923#[target_feature(enable = "avx512f")]
25924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25925#[cfg_attr(test, assert_instr(vpunpckhdq))]
25926pub fn _mm512_mask_unpackhi_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25927 unsafe {
25928 let unpackhi: i32x16 = _mm512_unpackhi_epi32(a, b).as_i32x16();
25929 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x16()))
25930 }
25931}
25932
25933/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25934///
25935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020)
25936#[inline]
25937#[target_feature(enable = "avx512f")]
25938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25939#[cfg_attr(test, assert_instr(vpunpckhdq))]
25940pub fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25941 unsafe {
25942 let unpackhi: i32x16 = _mm512_unpackhi_epi32(a, b).as_i32x16();
25943 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i32x16::ZERO))
25944 }
25945}
25946
25947/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25948///
25949/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016)
25950#[inline]
25951#[target_feature(enable = "avx512f,avx512vl")]
25952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25953#[cfg_attr(test, assert_instr(vpunpckhdq))]
25954pub fn _mm256_mask_unpackhi_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25955 unsafe {
25956 let unpackhi: i32x8 = _mm256_unpackhi_epi32(a, b).as_i32x8();
25957 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x8()))
25958 }
25959}
25960
25961/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25962///
25963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017)
25964#[inline]
25965#[target_feature(enable = "avx512f,avx512vl")]
25966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25967#[cfg_attr(test, assert_instr(vpunpckhdq))]
25968pub fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25969 unsafe {
25970 let unpackhi: i32x8 = _mm256_unpackhi_epi32(a, b).as_i32x8();
25971 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i32x8::ZERO))
25972 }
25973}
25974
25975/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25976///
25977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013)
25978#[inline]
25979#[target_feature(enable = "avx512f,avx512vl")]
25980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25981#[cfg_attr(test, assert_instr(vpunpckhdq))]
25982pub fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25983 unsafe {
25984 let unpackhi: i32x4 = _mm_unpackhi_epi32(a, b).as_i32x4();
25985 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x4()))
25986 }
25987}
25988
25989/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25990///
25991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014)
25992#[inline]
25993#[target_feature(enable = "avx512f,avx512vl")]
25994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
25995#[cfg_attr(test, assert_instr(vpunpckhdq))]
25996pub fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25997 unsafe {
25998 let unpackhi: i32x4 = _mm_unpackhi_epi32(a, b).as_i32x4();
25999 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i32x4::ZERO))
26000 }
26001}
26002
26003/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
26004///
26005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030)
26006#[inline]
26007#[target_feature(enable = "avx512f")]
26008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26009#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
26010pub fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
26011 unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
26012}
26013
26014/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26015///
26016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028)
26017#[inline]
26018#[target_feature(enable = "avx512f")]
26019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26020#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26021pub fn _mm512_mask_unpackhi_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26022 unsafe {
26023 let unpackhi: i64x8 = _mm512_unpackhi_epi64(a, b).as_i64x8();
26024 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x8()))
26025 }
26026}
26027
26028/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26029///
26030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029)
26031#[inline]
26032#[target_feature(enable = "avx512f")]
26033#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26034#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26035pub fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26036 unsafe {
26037 let unpackhi: i64x8 = _mm512_unpackhi_epi64(a, b).as_i64x8();
26038 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i64x8::ZERO))
26039 }
26040}
26041
26042/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26043///
26044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025)
26045#[inline]
26046#[target_feature(enable = "avx512f,avx512vl")]
26047#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26048#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26049pub fn _mm256_mask_unpackhi_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26050 unsafe {
26051 let unpackhi: i64x4 = _mm256_unpackhi_epi64(a, b).as_i64x4();
26052 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x4()))
26053 }
26054}
26055
26056/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26057///
26058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026)
26059#[inline]
26060#[target_feature(enable = "avx512f,avx512vl")]
26061#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26062#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26063pub fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26064 unsafe {
26065 let unpackhi: i64x4 = _mm256_unpackhi_epi64(a, b).as_i64x4();
26066 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i64x4::ZERO))
26067 }
26068}
26069
26070/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26071///
26072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022)
26073#[inline]
26074#[target_feature(enable = "avx512f,avx512vl")]
26075#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26076#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26077pub fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26078 unsafe {
26079 let unpackhi: i64x2 = _mm_unpackhi_epi64(a, b).as_i64x2();
26080 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x2()))
26081 }
26082}
26083
26084/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26085///
26086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023)
26087#[inline]
26088#[target_feature(enable = "avx512f,avx512vl")]
26089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26090#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26091pub fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26092 unsafe {
26093 let unpackhi: i64x2 = _mm_unpackhi_epi64(a, b).as_i64x2();
26094 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i64x2::ZERO))
26095 }
26096}
26097
26098/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26099///
26100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060)
26101#[inline]
26102#[target_feature(enable = "avx512f")]
26103#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26104#[cfg_attr(test, assert_instr(vunpckhps))]
26105pub fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
26106 unsafe {
26107 #[rustfmt::skip]
26108 simd_shuffle!(
26109 a, b,
26110 [ 2, 18, 3, 19,
26111 2 + 4, 18 + 4, 3 + 4, 19 + 4,
26112 2 + 8, 18 + 8, 3 + 8, 19 + 8,
26113 2 + 12, 18 + 12, 3 + 12, 19 + 12],
26114 )
26115 }
26116}
26117
26118/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26119///
26120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058)
26121#[inline]
26122#[target_feature(enable = "avx512f")]
26123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26124#[cfg_attr(test, assert_instr(vunpckhps))]
26125pub fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26126 unsafe {
26127 let unpackhi: f32x16 = _mm512_unpackhi_ps(a, b).as_f32x16();
26128 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x16()))
26129 }
26130}
26131
26132/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26133///
26134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059)
26135#[inline]
26136#[target_feature(enable = "avx512f")]
26137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26138#[cfg_attr(test, assert_instr(vunpckhps))]
26139pub fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26140 unsafe {
26141 let unpackhi: f32x16 = _mm512_unpackhi_ps(a, b).as_f32x16();
26142 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f32x16::ZERO))
26143 }
26144}
26145
26146/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26147///
26148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055)
26149#[inline]
26150#[target_feature(enable = "avx512f,avx512vl")]
26151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26152#[cfg_attr(test, assert_instr(vunpckhps))]
26153pub fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26154 unsafe {
26155 let unpackhi: f32x8 = _mm256_unpackhi_ps(a, b).as_f32x8();
26156 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x8()))
26157 }
26158}
26159
26160/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26161///
26162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056)
26163#[inline]
26164#[target_feature(enable = "avx512f,avx512vl")]
26165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26166#[cfg_attr(test, assert_instr(vunpckhps))]
26167pub fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26168 unsafe {
26169 let unpackhi: f32x8 = _mm256_unpackhi_ps(a, b).as_f32x8();
26170 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f32x8::ZERO))
26171 }
26172}
26173
26174/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26175///
26176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052)
26177#[inline]
26178#[target_feature(enable = "avx512f,avx512vl")]
26179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26180#[cfg_attr(test, assert_instr(vunpckhps))]
26181pub fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26182 unsafe {
26183 let unpackhi: f32x4 = _mm_unpackhi_ps(a, b).as_f32x4();
26184 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x4()))
26185 }
26186}
26187
26188/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26189///
26190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053)
26191#[inline]
26192#[target_feature(enable = "avx512f,avx512vl")]
26193#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26194#[cfg_attr(test, assert_instr(vunpckhps))]
26195pub fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26196 unsafe {
26197 let unpackhi: f32x4 = _mm_unpackhi_ps(a, b).as_f32x4();
26198 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f32x4::ZERO))
26199 }
26200}
26201
26202/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26203///
26204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048)
26205#[inline]
26206#[target_feature(enable = "avx512f")]
26207#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26208#[cfg_attr(test, assert_instr(vunpckhpd))]
26209pub fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
26210 unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
26211}
26212
26213/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26214///
26215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046)
26216#[inline]
26217#[target_feature(enable = "avx512f")]
26218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26219#[cfg_attr(test, assert_instr(vunpckhpd))]
26220pub fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26221 unsafe {
26222 let unpackhi: f64x8 = _mm512_unpackhi_pd(a, b).as_f64x8();
26223 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x8()))
26224 }
26225}
26226
26227/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26228///
26229/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047)
26230#[inline]
26231#[target_feature(enable = "avx512f")]
26232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26233#[cfg_attr(test, assert_instr(vunpckhpd))]
26234pub fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26235 unsafe {
26236 let unpackhi: f64x8 = _mm512_unpackhi_pd(a, b).as_f64x8();
26237 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f64x8::ZERO))
26238 }
26239}
26240
26241/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26242///
26243/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043)
26244#[inline]
26245#[target_feature(enable = "avx512f,avx512vl")]
26246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26247#[cfg_attr(test, assert_instr(vunpckhpd))]
26248pub fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26249 unsafe {
26250 let unpackhi: f64x4 = _mm256_unpackhi_pd(a, b).as_f64x4();
26251 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x4()))
26252 }
26253}
26254
26255/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26256///
26257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044)
26258#[inline]
26259#[target_feature(enable = "avx512f,avx512vl")]
26260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26261#[cfg_attr(test, assert_instr(vunpckhpd))]
26262pub fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26263 unsafe {
26264 let unpackhi: f64x4 = _mm256_unpackhi_pd(a, b).as_f64x4();
26265 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f64x4::ZERO))
26266 }
26267}
26268
26269/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26270///
26271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040)
26272#[inline]
26273#[target_feature(enable = "avx512f,avx512vl")]
26274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26275#[cfg_attr(test, assert_instr(vunpckhpd))]
26276pub fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26277 unsafe {
26278 let unpackhi: f64x2 = _mm_unpackhi_pd(a, b).as_f64x2();
26279 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x2()))
26280 }
26281}
26282
26283/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26284///
26285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041)
26286#[inline]
26287#[target_feature(enable = "avx512f,avx512vl")]
26288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26289#[cfg_attr(test, assert_instr(vunpckhpd))]
26290pub fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26291 unsafe {
26292 let unpackhi: f64x2 = _mm_unpackhi_pd(a, b).as_f64x2();
26293 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f64x2::ZERO))
26294 }
26295}
26296
26297/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26298///
26299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078)
26300#[inline]
26301#[target_feature(enable = "avx512f")]
26302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26303#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
26304pub fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
26305 unsafe {
26306 let a: i32x16 = a.as_i32x16();
26307 let b: i32x16 = b.as_i32x16();
26308 #[rustfmt::skip]
26309 let r: i32x16 = simd_shuffle!(
26310 a, b,
26311 [ 0, 16, 1, 17,
26312 0 + 4, 16 + 4, 1 + 4, 17 + 4,
26313 0 + 8, 16 + 8, 1 + 8, 17 + 8,
26314 0 + 12, 16 + 12, 1 + 12, 17 + 12],
26315 );
26316 transmute(src:r)
26317 }
26318}
26319
26320/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26321///
26322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076)
26323#[inline]
26324#[target_feature(enable = "avx512f")]
26325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26326#[cfg_attr(test, assert_instr(vpunpckldq))]
26327pub fn _mm512_mask_unpacklo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26328 unsafe {
26329 let unpacklo: i32x16 = _mm512_unpacklo_epi32(a, b).as_i32x16();
26330 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x16()))
26331 }
26332}
26333
26334/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26335///
26336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077)
26337#[inline]
26338#[target_feature(enable = "avx512f")]
26339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26340#[cfg_attr(test, assert_instr(vpunpckldq))]
26341pub fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26342 unsafe {
26343 let unpacklo: i32x16 = _mm512_unpacklo_epi32(a, b).as_i32x16();
26344 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i32x16::ZERO))
26345 }
26346}
26347
26348/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26349///
26350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073)
26351#[inline]
26352#[target_feature(enable = "avx512f,avx512vl")]
26353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26354#[cfg_attr(test, assert_instr(vpunpckldq))]
26355pub fn _mm256_mask_unpacklo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26356 unsafe {
26357 let unpacklo: i32x8 = _mm256_unpacklo_epi32(a, b).as_i32x8();
26358 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x8()))
26359 }
26360}
26361
26362/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26363///
26364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074)
26365#[inline]
26366#[target_feature(enable = "avx512f,avx512vl")]
26367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26368#[cfg_attr(test, assert_instr(vpunpckldq))]
26369pub fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26370 unsafe {
26371 let unpacklo: i32x8 = _mm256_unpacklo_epi32(a, b).as_i32x8();
26372 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i32x8::ZERO))
26373 }
26374}
26375
26376/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26377///
26378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070)
26379#[inline]
26380#[target_feature(enable = "avx512f,avx512vl")]
26381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26382#[cfg_attr(test, assert_instr(vpunpckldq))]
26383pub fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26384 unsafe {
26385 let unpacklo: i32x4 = _mm_unpacklo_epi32(a, b).as_i32x4();
26386 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x4()))
26387 }
26388}
26389
26390/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26391///
26392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071)
26393#[inline]
26394#[target_feature(enable = "avx512f,avx512vl")]
26395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26396#[cfg_attr(test, assert_instr(vpunpckldq))]
26397pub fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26398 unsafe {
26399 let unpacklo: i32x4 = _mm_unpacklo_epi32(a, b).as_i32x4();
26400 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i32x4::ZERO))
26401 }
26402}
26403
26404/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26405///
26406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087)
26407#[inline]
26408#[target_feature(enable = "avx512f")]
26409#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26410#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
26411pub fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
26412 unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
26413}
26414
26415/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26416///
26417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085)
26418#[inline]
26419#[target_feature(enable = "avx512f")]
26420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26421#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26422pub fn _mm512_mask_unpacklo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26423 unsafe {
26424 let unpacklo: i64x8 = _mm512_unpacklo_epi64(a, b).as_i64x8();
26425 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x8()))
26426 }
26427}
26428
26429/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26430///
26431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086)
26432#[inline]
26433#[target_feature(enable = "avx512f")]
26434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26435#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26436pub fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26437 unsafe {
26438 let unpacklo: i64x8 = _mm512_unpacklo_epi64(a, b).as_i64x8();
26439 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i64x8::ZERO))
26440 }
26441}
26442
26443/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26444///
26445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082)
26446#[inline]
26447#[target_feature(enable = "avx512f,avx512vl")]
26448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26449#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26450pub fn _mm256_mask_unpacklo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26451 unsafe {
26452 let unpacklo: i64x4 = _mm256_unpacklo_epi64(a, b).as_i64x4();
26453 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x4()))
26454 }
26455}
26456
26457/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26458///
26459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083)
26460#[inline]
26461#[target_feature(enable = "avx512f,avx512vl")]
26462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26463#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26464pub fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26465 unsafe {
26466 let unpacklo: i64x4 = _mm256_unpacklo_epi64(a, b).as_i64x4();
26467 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i64x4::ZERO))
26468 }
26469}
26470
26471/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26472///
26473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079)
26474#[inline]
26475#[target_feature(enable = "avx512f,avx512vl")]
26476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26477#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26478pub fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26479 unsafe {
26480 let unpacklo: i64x2 = _mm_unpacklo_epi64(a, b).as_i64x2();
26481 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x2()))
26482 }
26483}
26484
26485/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26486///
26487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080)
26488#[inline]
26489#[target_feature(enable = "avx512f,avx512vl")]
26490#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26491#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26492pub fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26493 unsafe {
26494 let unpacklo: i64x2 = _mm_unpacklo_epi64(a, b).as_i64x2();
26495 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i64x2::ZERO))
26496 }
26497}
26498
26499/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26500///
26501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117)
26502#[inline]
26503#[target_feature(enable = "avx512f")]
26504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26505#[cfg_attr(test, assert_instr(vunpcklps))]
26506pub fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
26507 unsafe {
26508 #[rustfmt::skip]
26509 simd_shuffle!(a, b,
26510 [ 0, 16, 1, 17,
26511 0 + 4, 16 + 4, 1 + 4, 17 + 4,
26512 0 + 8, 16 + 8, 1 + 8, 17 + 8,
26513 0 + 12, 16 + 12, 1 + 12, 17 + 12],
26514 )
26515 }
26516}
26517
26518/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26519///
26520/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115)
26521#[inline]
26522#[target_feature(enable = "avx512f")]
26523#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26524#[cfg_attr(test, assert_instr(vunpcklps))]
26525pub fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26526 unsafe {
26527 let unpacklo: f32x16 = _mm512_unpacklo_ps(a, b).as_f32x16();
26528 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x16()))
26529 }
26530}
26531
26532/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26533///
26534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116)
26535#[inline]
26536#[target_feature(enable = "avx512f")]
26537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26538#[cfg_attr(test, assert_instr(vunpcklps))]
26539pub fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26540 unsafe {
26541 let unpacklo: f32x16 = _mm512_unpacklo_ps(a, b).as_f32x16();
26542 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f32x16::ZERO))
26543 }
26544}
26545
26546/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26547///
26548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112)
26549#[inline]
26550#[target_feature(enable = "avx512f,avx512vl")]
26551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26552#[cfg_attr(test, assert_instr(vunpcklps))]
26553pub fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26554 unsafe {
26555 let unpacklo: f32x8 = _mm256_unpacklo_ps(a, b).as_f32x8();
26556 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x8()))
26557 }
26558}
26559
26560/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26561///
26562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113)
26563#[inline]
26564#[target_feature(enable = "avx512f,avx512vl")]
26565#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26566#[cfg_attr(test, assert_instr(vunpcklps))]
26567pub fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26568 unsafe {
26569 let unpacklo: f32x8 = _mm256_unpacklo_ps(a, b).as_f32x8();
26570 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f32x8::ZERO))
26571 }
26572}
26573
26574/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26575///
26576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109)
26577#[inline]
26578#[target_feature(enable = "avx512f,avx512vl")]
26579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26580#[cfg_attr(test, assert_instr(vunpcklps))]
26581pub fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26582 unsafe {
26583 let unpacklo: f32x4 = _mm_unpacklo_ps(a, b).as_f32x4();
26584 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x4()))
26585 }
26586}
26587
26588/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26589///
26590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110)
26591#[inline]
26592#[target_feature(enable = "avx512f,avx512vl")]
26593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26594#[cfg_attr(test, assert_instr(vunpcklps))]
26595pub fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26596 unsafe {
26597 let unpacklo: f32x4 = _mm_unpacklo_ps(a, b).as_f32x4();
26598 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f32x4::ZERO))
26599 }
26600}
26601
26602/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26603///
26604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105)
26605#[inline]
26606#[target_feature(enable = "avx512f")]
26607#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26608#[cfg_attr(test, assert_instr(vunpcklpd))]
26609pub fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
26610 unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
26611}
26612
26613/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26614///
26615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103)
26616#[inline]
26617#[target_feature(enable = "avx512f")]
26618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26619#[cfg_attr(test, assert_instr(vunpcklpd))]
26620pub fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26621 unsafe {
26622 let unpacklo: f64x8 = _mm512_unpacklo_pd(a, b).as_f64x8();
26623 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x8()))
26624 }
26625}
26626
26627/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26628///
26629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104)
26630#[inline]
26631#[target_feature(enable = "avx512f")]
26632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26633#[cfg_attr(test, assert_instr(vunpcklpd))]
26634pub fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26635 unsafe {
26636 let unpacklo: f64x8 = _mm512_unpacklo_pd(a, b).as_f64x8();
26637 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f64x8::ZERO))
26638 }
26639}
26640
26641/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26642///
26643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100)
26644#[inline]
26645#[target_feature(enable = "avx512f,avx512vl")]
26646#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26647#[cfg_attr(test, assert_instr(vunpcklpd))]
26648pub fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26649 unsafe {
26650 let unpacklo: f64x4 = _mm256_unpacklo_pd(a, b).as_f64x4();
26651 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x4()))
26652 }
26653}
26654
26655/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26656///
26657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101)
26658#[inline]
26659#[target_feature(enable = "avx512f,avx512vl")]
26660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26661#[cfg_attr(test, assert_instr(vunpcklpd))]
26662pub fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26663 unsafe {
26664 let unpacklo: f64x4 = _mm256_unpacklo_pd(a, b).as_f64x4();
26665 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f64x4::ZERO))
26666 }
26667}
26668
26669/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26670///
26671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097)
26672#[inline]
26673#[target_feature(enable = "avx512f,avx512vl")]
26674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26675#[cfg_attr(test, assert_instr(vunpcklpd))]
26676pub fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26677 unsafe {
26678 let unpacklo: f64x2 = _mm_unpacklo_pd(a, b).as_f64x2();
26679 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x2()))
26680 }
26681}
26682
26683/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26684///
26685/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098)
26686#[inline]
26687#[target_feature(enable = "avx512f,avx512vl")]
26688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26689#[cfg_attr(test, assert_instr(vunpcklpd))]
26690pub fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26691 unsafe {
26692 let unpacklo: f64x2 = _mm_unpacklo_pd(a, b).as_f64x2();
26693 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f64x2::ZERO))
26694 }
26695}
26696
26697/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26698///
26699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621)
26700#[inline]
26701#[target_feature(enable = "avx512f")]
26702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26703pub fn _mm512_castps128_ps512(a: __m128) -> __m512 {
26704 unsafe {
26705 simd_shuffle!(
26706 a,
26707 _mm_undefined_ps(),
26708 [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
26709 )
26710 }
26711}
26712
26713/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26714///
26715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623)
26716#[inline]
26717#[target_feature(enable = "avx512f")]
26718#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26719pub fn _mm512_castps256_ps512(a: __m256) -> __m512 {
26720 unsafe {
26721 simd_shuffle!(
26722 a,
26723 _mm256_undefined_ps(),
26724 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
26725 )
26726 }
26727}
26728
26729/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26730///
26731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps128_ps512&expand=6196)
26732#[inline]
26733#[target_feature(enable = "avx512f")]
26734#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26735pub fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
26736 unsafe {
26737 simd_shuffle!(
26738 a,
26739 _mm_set1_ps(0.),
26740 [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
26741 )
26742 }
26743}
26744
26745/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26746///
26747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps256_ps512&expand=6197)
26748#[inline]
26749#[target_feature(enable = "avx512f")]
26750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26751pub fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
26752 unsafe {
26753 simd_shuffle!(
26754 a,
26755 _mm256_set1_ps(0.),
26756 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
26757 )
26758 }
26759}
26760
26761/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26762///
26763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps128&expand=624)
26764#[inline]
26765#[target_feature(enable = "avx512f")]
26766#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26767pub fn _mm512_castps512_ps128(a: __m512) -> __m128 {
26768 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26769}
26770
26771/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26772///
26773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps256&expand=625)
26774#[inline]
26775#[target_feature(enable = "avx512f")]
26776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26777pub fn _mm512_castps512_ps256(a: __m512) -> __m256 {
26778 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
26779}
26780
26781/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26782///
26783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_pd&expand=616)
26784#[inline]
26785#[target_feature(enable = "avx512f")]
26786#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26787pub fn _mm512_castps_pd(a: __m512) -> __m512d {
26788 unsafe { transmute(src:a) }
26789}
26790
26791/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26792///
26793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_si512&expand=619)
26794#[inline]
26795#[target_feature(enable = "avx512f")]
26796#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26797pub fn _mm512_castps_si512(a: __m512) -> __m512i {
26798 unsafe { transmute(src:a) }
26799}
26800
26801/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26802///
26803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609)
26804#[inline]
26805#[target_feature(enable = "avx512f")]
26806#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26807pub fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
26808 unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26809}
26810
26811/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26812///
26813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611)
26814#[inline]
26815#[target_feature(enable = "avx512f")]
26816#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26817pub fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
26818 unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26819}
26820
26821/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26822///
26823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd128_pd512&expand=6193)
26824#[inline]
26825#[target_feature(enable = "avx512f")]
26826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26827pub fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
26828 unsafe { simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) }
26829}
26830
26831/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26832///
26833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd256_pd512&expand=6194)
26834#[inline]
26835#[target_feature(enable = "avx512f")]
26836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26837pub fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
26838 unsafe { simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) }
26839}
26840
26841/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26842///
26843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd128&expand=612)
26844#[inline]
26845#[target_feature(enable = "avx512f")]
26846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26847pub fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
26848 unsafe { simd_shuffle!(a, a, [0, 1]) }
26849}
26850
26851/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26852///
26853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd256&expand=613)
26854#[inline]
26855#[target_feature(enable = "avx512f")]
26856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26857pub fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
26858 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26859}
26860
26861/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26862///
26863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_ps&expand=604)
26864#[inline]
26865#[target_feature(enable = "avx512f")]
26866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26867pub fn _mm512_castpd_ps(a: __m512d) -> __m512 {
26868 unsafe { transmute(src:a) }
26869}
26870
26871/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26872///
26873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_si512&expand=607)
26874#[inline]
26875#[target_feature(enable = "avx512f")]
26876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26877pub fn _mm512_castpd_si512(a: __m512d) -> __m512i {
26878 unsafe { transmute(src:a) }
26879}
26880
26881/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26882///
26883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629)
26884#[inline]
26885#[target_feature(enable = "avx512f")]
26886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26887pub fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
26888 unsafe { simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26889}
26890
26891/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26892///
26893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633)
26894#[inline]
26895#[target_feature(enable = "avx512f")]
26896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26897pub fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
26898 unsafe { simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26899}
26900
26901/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26902///
26903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi128_si512&expand=6199)
26904#[inline]
26905#[target_feature(enable = "avx512f")]
26906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26907pub fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
26908 unsafe { simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26909}
26910
26911/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26912///
26913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi256_si512&expand=6200)
26914#[inline]
26915#[target_feature(enable = "avx512f")]
26916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26917pub fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
26918 unsafe { simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26919}
26920
26921/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26922///
26923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si128&expand=636)
26924#[inline]
26925#[target_feature(enable = "avx512f")]
26926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26927pub fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
26928 unsafe { simd_shuffle!(a, a, [0, 1]) }
26929}
26930
26931/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26932///
26933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si256&expand=637)
26934#[inline]
26935#[target_feature(enable = "avx512f")]
26936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26937pub fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
26938 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26939}
26940
26941/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26942///
26943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_ps&expand=635)
26944#[inline]
26945#[target_feature(enable = "avx512f")]
26946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26947pub fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
26948 unsafe { transmute(src:a) }
26949}
26950
26951/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26952///
26953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_pd&expand=634)
26954#[inline]
26955#[target_feature(enable = "avx512f")]
26956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26957pub fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
26958 unsafe { transmute(src:a) }
26959}
26960
26961/// Copy the lower 32-bit integer in a to dst.
26962///
26963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882)
26964#[inline]
26965#[target_feature(enable = "avx512f")]
26966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26967#[cfg_attr(test, assert_instr(vmovd))]
26968pub fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
26969 unsafe { simd_extract!(a.as_i32x16(), 0) }
26970}
26971
26972/// Copy the lower single-precision (32-bit) floating-point element of a to dst.
26973///
26974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtss_f32)
26975#[inline]
26976#[target_feature(enable = "avx512f")]
26977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26978pub fn _mm512_cvtss_f32(a: __m512) -> f32 {
26979 unsafe { simd_extract!(a, 0) }
26980}
26981
26982/// Copy the lower double-precision (64-bit) floating-point element of a to dst.
26983///
26984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsd_f64)
26985#[inline]
26986#[target_feature(enable = "avx512f")]
26987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26988pub fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
26989 unsafe { simd_extract!(a, 0) }
26990}
26991
26992/// Broadcast the low packed 32-bit integer from a to all elements of dst.
26993///
26994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545)
26995#[inline]
26996#[target_feature(enable = "avx512f")]
26997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
26998#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
26999pub fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
27000 unsafe {
27001 let a: i32x16 = _mm512_castsi128_si512(a).as_i32x16();
27002 let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
27003 transmute(src:ret)
27004 }
27005}
27006
27007/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27008///
27009/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546)
27010#[inline]
27011#[target_feature(enable = "avx512f")]
27012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27013#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27014pub fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
27015 unsafe {
27016 let broadcast: i32x16 = _mm512_broadcastd_epi32(a).as_i32x16();
27017 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x16()))
27018 }
27019}
27020
27021/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27022///
27023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547)
27024#[inline]
27025#[target_feature(enable = "avx512f")]
27026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27027#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27028pub fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
27029 unsafe {
27030 let broadcast: i32x16 = _mm512_broadcastd_epi32(a).as_i32x16();
27031 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x16::ZERO))
27032 }
27033}
27034
27035/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27036///
27037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543)
27038#[inline]
27039#[target_feature(enable = "avx512f,avx512vl")]
27040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27041#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27042pub fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27043 unsafe {
27044 let broadcast: i32x8 = _mm256_broadcastd_epi32(a).as_i32x8();
27045 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x8()))
27046 }
27047}
27048
27049/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27050///
27051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544)
27052#[inline]
27053#[target_feature(enable = "avx512f,avx512vl")]
27054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27055#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27056pub fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
27057 unsafe {
27058 let broadcast: i32x8 = _mm256_broadcastd_epi32(a).as_i32x8();
27059 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x8::ZERO))
27060 }
27061}
27062
27063/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27064///
27065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540)
27066#[inline]
27067#[target_feature(enable = "avx512f,avx512vl")]
27068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27069#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27070pub fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27071 unsafe {
27072 let broadcast: i32x4 = _mm_broadcastd_epi32(a).as_i32x4();
27073 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x4()))
27074 }
27075}
27076
27077/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27078///
27079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541)
27080#[inline]
27081#[target_feature(enable = "avx512f,avx512vl")]
27082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27083#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27084pub fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
27085 unsafe {
27086 let broadcast: i32x4 = _mm_broadcastd_epi32(a).as_i32x4();
27087 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x4::ZERO))
27088 }
27089}
27090
27091/// Broadcast the low packed 64-bit integer from a to all elements of dst.
27092///
27093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560)
27094#[inline]
27095#[target_feature(enable = "avx512f")]
27096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27097#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq
27098pub fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
27099 unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
27100}
27101
27102/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27103///
27104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561)
27105#[inline]
27106#[target_feature(enable = "avx512f")]
27107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27108#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27109pub fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
27110 unsafe {
27111 let broadcast: i64x8 = _mm512_broadcastq_epi64(a).as_i64x8();
27112 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x8()))
27113 }
27114}
27115
27116/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27117///
27118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562)
27119#[inline]
27120#[target_feature(enable = "avx512f")]
27121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27122#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27123pub fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
27124 unsafe {
27125 let broadcast: i64x8 = _mm512_broadcastq_epi64(a).as_i64x8();
27126 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x8::ZERO))
27127 }
27128}
27129
27130/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27131///
27132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558)
27133#[inline]
27134#[target_feature(enable = "avx512f,avx512vl")]
27135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27136#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27137pub fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27138 unsafe {
27139 let broadcast: i64x4 = _mm256_broadcastq_epi64(a).as_i64x4();
27140 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x4()))
27141 }
27142}
27143
27144/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27145///
27146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559)
27147#[inline]
27148#[target_feature(enable = "avx512f,avx512vl")]
27149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27150#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27151pub fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
27152 unsafe {
27153 let broadcast: i64x4 = _mm256_broadcastq_epi64(a).as_i64x4();
27154 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x4::ZERO))
27155 }
27156}
27157
27158/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27159///
27160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555)
27161#[inline]
27162#[target_feature(enable = "avx512f,avx512vl")]
27163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27164#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27165pub fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27166 unsafe {
27167 let broadcast: i64x2 = _mm_broadcastq_epi64(a).as_i64x2();
27168 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x2()))
27169 }
27170}
27171
27172/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27173///
27174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556)
27175#[inline]
27176#[target_feature(enable = "avx512f,avx512vl")]
27177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27178#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27179pub fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
27180 unsafe {
27181 let broadcast: i64x2 = _mm_broadcastq_epi64(a).as_i64x2();
27182 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x2::ZERO))
27183 }
27184}
27185
27186/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
27187///
27188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578)
27189#[inline]
27190#[target_feature(enable = "avx512f")]
27191#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27192#[cfg_attr(test, assert_instr(vbroadcastss))]
27193pub fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
27194 unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) }
27195}
27196
27197/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27198///
27199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579)
27200#[inline]
27201#[target_feature(enable = "avx512f")]
27202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27203#[cfg_attr(test, assert_instr(vbroadcastss))]
27204pub fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27205 unsafe {
27206 let broadcast: f32x16 = _mm512_broadcastss_ps(a).as_f32x16();
27207 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x16()))
27208 }
27209}
27210
27211/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27212///
27213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580)
27214#[inline]
27215#[target_feature(enable = "avx512f")]
27216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27217#[cfg_attr(test, assert_instr(vbroadcastss))]
27218pub fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
27219 unsafe {
27220 let broadcast: f32x16 = _mm512_broadcastss_ps(a).as_f32x16();
27221 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x16::ZERO))
27222 }
27223}
27224
27225/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27226///
27227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576)
27228#[inline]
27229#[target_feature(enable = "avx512f,avx512vl")]
27230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27231#[cfg_attr(test, assert_instr(vbroadcastss))]
27232pub fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27233 unsafe {
27234 let broadcast: f32x8 = _mm256_broadcastss_ps(a).as_f32x8();
27235 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x8()))
27236 }
27237}
27238
27239/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27240///
27241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577)
27242#[inline]
27243#[target_feature(enable = "avx512f,avx512vl")]
27244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27245#[cfg_attr(test, assert_instr(vbroadcastss))]
27246pub fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
27247 unsafe {
27248 let broadcast: f32x8 = _mm256_broadcastss_ps(a).as_f32x8();
27249 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x8::ZERO))
27250 }
27251}
27252
27253/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27254///
27255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573)
27256#[inline]
27257#[target_feature(enable = "avx512f,avx512vl")]
27258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27259#[cfg_attr(test, assert_instr(vbroadcastss))]
27260pub fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
27261 unsafe {
27262 let broadcast: f32x4 = _mm_broadcastss_ps(a).as_f32x4();
27263 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x4()))
27264 }
27265}
27266
27267/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27268///
27269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574)
27270#[inline]
27271#[target_feature(enable = "avx512f,avx512vl")]
27272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27273#[cfg_attr(test, assert_instr(vbroadcastss))]
27274pub fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
27275 unsafe {
27276 let broadcast: f32x4 = _mm_broadcastss_ps(a).as_f32x4();
27277 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x4::ZERO))
27278 }
27279}
27280
27281/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
27282///
27283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567)
27284#[inline]
27285#[target_feature(enable = "avx512f")]
27286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27287#[cfg_attr(test, assert_instr(vbroadcastsd))]
27288pub fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
27289 unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
27290}
27291
27292/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27293///
27294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568)
27295#[inline]
27296#[target_feature(enable = "avx512f")]
27297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27298#[cfg_attr(test, assert_instr(vbroadcastsd))]
27299pub fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
27300 unsafe {
27301 let broadcast: f64x8 = _mm512_broadcastsd_pd(a).as_f64x8();
27302 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x8()))
27303 }
27304}
27305
27306/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27307///
27308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569)
27309#[inline]
27310#[target_feature(enable = "avx512f")]
27311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27312#[cfg_attr(test, assert_instr(vbroadcastsd))]
27313pub fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
27314 unsafe {
27315 let broadcast: f64x8 = _mm512_broadcastsd_pd(a).as_f64x8();
27316 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f64x8::ZERO))
27317 }
27318}
27319
27320/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27321///
27322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565)
27323#[inline]
27324#[target_feature(enable = "avx512f,avx512vl")]
27325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27326#[cfg_attr(test, assert_instr(vbroadcastsd))]
27327pub fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
27328 unsafe {
27329 let broadcast: f64x4 = _mm256_broadcastsd_pd(a).as_f64x4();
27330 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x4()))
27331 }
27332}
27333
27334/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27335///
27336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566)
27337#[inline]
27338#[target_feature(enable = "avx512f,avx512vl")]
27339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27340#[cfg_attr(test, assert_instr(vbroadcastsd))]
27341pub fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
27342 unsafe {
27343 let broadcast: f64x4 = _mm256_broadcastsd_pd(a).as_f64x4();
27344 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f64x4::ZERO))
27345 }
27346}
27347
27348/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27349///
27350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510)
27351#[inline]
27352#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27353#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27354pub fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
27355 unsafe {
27356 let a: i32x4 = a.as_i32x4();
27357 let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
27358 transmute(src:ret)
27359 }
27360}
27361
27362/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27363///
27364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511)
27365#[inline]
27366#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27367#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27368pub fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
27369 unsafe {
27370 let broadcast: i32x16 = _mm512_broadcast_i32x4(a).as_i32x16();
27371 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x16()))
27372 }
27373}
27374
27375/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27376///
27377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512)
27378#[inline]
27379#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27380#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27381pub fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
27382 unsafe {
27383 let broadcast: i32x16 = _mm512_broadcast_i32x4(a).as_i32x16();
27384 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x16::ZERO))
27385 }
27386}
27387
27388/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27389///
27390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507)
27391#[inline]
27392#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27393#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27394pub fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
27395 unsafe {
27396 let a: i32x4 = a.as_i32x4();
27397 let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
27398 transmute(src:ret)
27399 }
27400}
27401
27402/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27403///
27404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508)
27405#[inline]
27406#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27408pub fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27409 unsafe {
27410 let broadcast: i32x8 = _mm256_broadcast_i32x4(a).as_i32x8();
27411 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x8()))
27412 }
27413}
27414
27415/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27416///
27417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509)
27418#[inline]
27419#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27420#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27421pub fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
27422 unsafe {
27423 let broadcast: i32x8 = _mm256_broadcast_i32x4(a).as_i32x8();
27424 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x8::ZERO))
27425 }
27426}
27427
27428/// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
27429///
27430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x4&expand=522)
27431#[inline]
27432#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27433#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27434pub fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
27435 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27436}
27437
27438/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27439///
27440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x4&expand=523)
27441#[inline]
27442#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27443#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27444pub fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
27445 unsafe {
27446 let broadcast: i64x8 = _mm512_broadcast_i64x4(a).as_i64x8();
27447 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x8()))
27448 }
27449}
27450
27451/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27452///
27453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x4&expand=524)
27454#[inline]
27455#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27457pub fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
27458 unsafe {
27459 let broadcast: i64x8 = _mm512_broadcast_i64x4(a).as_i64x8();
27460 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x8::ZERO))
27461 }
27462}
27463
27464/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27465///
27466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483)
27467#[inline]
27468#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
27469#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27470pub fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
27471 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) }
27472}
27473
27474/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27475///
27476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484)
27477#[inline]
27478#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27479#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27480pub fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27481 unsafe {
27482 let broadcast: f32x16 = _mm512_broadcast_f32x4(a).as_f32x16();
27483 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x16()))
27484 }
27485}
27486
27487/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27488///
27489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485)
27490#[inline]
27491#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27493pub fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
27494 unsafe {
27495 let broadcast: f32x16 = _mm512_broadcast_f32x4(a).as_f32x16();
27496 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x16::ZERO))
27497 }
27498}
27499
27500/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27501///
27502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480)
27503#[inline]
27504#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
27505#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27506pub fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
27507 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27508}
27509
27510/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27511///
27512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481)
27513#[inline]
27514#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27516pub fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27517 unsafe {
27518 let broadcast: f32x8 = _mm256_broadcast_f32x4(a).as_f32x8();
27519 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x8()))
27520 }
27521}
27522
27523/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27524///
27525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482)
27526#[inline]
27527#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27528#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27529pub fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
27530 unsafe {
27531 let broadcast: f32x8 = _mm256_broadcast_f32x4(a).as_f32x8();
27532 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x8::ZERO))
27533 }
27534}
27535
27536/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
27537///
27538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x4&expand=495)
27539#[inline]
27540#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
27541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27542pub fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
27543 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27544}
27545
27546/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27547///
27548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x4&expand=496)
27549#[inline]
27550#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27552pub fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
27553 unsafe {
27554 let broadcast: f64x8 = _mm512_broadcast_f64x4(a).as_f64x8();
27555 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x8()))
27556 }
27557}
27558
27559/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27560///
27561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x4&expand=497)
27562#[inline]
27563#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27565pub fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
27566 unsafe {
27567 let broadcast: f64x8 = _mm512_broadcast_f64x4(a).as_f64x8();
27568 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f64x8::ZERO))
27569 }
27570}
27571
27572/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27573///
27574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435)
27575#[inline]
27576#[target_feature(enable = "avx512f")]
27577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27578#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27579pub fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27580 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x16(), no:a.as_i32x16())) }
27581}
27582
27583/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27584///
27585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434)
27586#[inline]
27587#[target_feature(enable = "avx512f,avx512vl")]
27588#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27589#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27590pub fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27591 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x8(), no:a.as_i32x8())) }
27592}
27593
27594/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27595///
27596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432)
27597#[inline]
27598#[target_feature(enable = "avx512f,avx512vl")]
27599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27600#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27601pub fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27602 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x4(), no:a.as_i32x4())) }
27603}
27604
27605/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27606///
27607/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438)
27608#[inline]
27609#[target_feature(enable = "avx512f")]
27610#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27611#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27612pub fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27613 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x8(), no:a.as_i64x8())) }
27614}
27615
27616/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27617///
27618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437)
27619#[inline]
27620#[target_feature(enable = "avx512f,avx512vl")]
27621#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27622#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27623pub fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27624 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x4(), no:a.as_i64x4())) }
27625}
27626
27627/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27628///
27629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436)
27630#[inline]
27631#[target_feature(enable = "avx512f,avx512vl")]
27632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27633#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27634pub fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27635 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x2(), no:a.as_i64x2())) }
27636}
27637
27638/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27639///
27640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451)
27641#[inline]
27642#[target_feature(enable = "avx512f")]
27643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27644#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27645pub fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27646 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x16(), no:a.as_f32x16())) }
27647}
27648
27649/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27650///
27651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450)
27652#[inline]
27653#[target_feature(enable = "avx512f,avx512vl")]
27654#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27655#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27656pub fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27657 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x8(), no:a.as_f32x8())) }
27658}
27659
27660/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27661///
27662/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448)
27663#[inline]
27664#[target_feature(enable = "avx512f,avx512vl")]
27665#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27666#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27667pub fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27668 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x4(), no:a.as_f32x4())) }
27669}
27670
27671/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27672///
27673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446)
27674#[inline]
27675#[target_feature(enable = "avx512f")]
27676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27677#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27678pub fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27679 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x8(), no:a.as_f64x8())) }
27680}
27681
27682/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27683///
27684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445)
27685#[inline]
27686#[target_feature(enable = "avx512f,avx512vl")]
27687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27688#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27689pub fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27690 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x4(), no:a.as_f64x4())) }
27691}
27692
27693/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27694///
27695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443)
27696#[inline]
27697#[target_feature(enable = "avx512f,avx512vl")]
27698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27699#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27700pub fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27701 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x2(), no:a.as_f64x2())) }
27702}
27703
27704/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
27705///
27706/// <div class="warning">Only lowest <strong>4 bits</strong> are used from the mask (shift at maximum by 60 bytes)!</div>
27707///
27708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi32&expand=245)
27709#[inline]
27710#[target_feature(enable = "avx512f")]
27711#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27712#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27713#[rustc_legacy_const_generics(2)]
27714pub fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27715 unsafe {
27716 static_assert_uimm_bits!(IMM8, 8);
27717 let a = a.as_i32x16();
27718 let b = b.as_i32x16();
27719 let imm8: i32 = IMM8 % 16;
27720 let r: i32x16 = match imm8 {
27721 0 => simd_shuffle!(
27722 a,
27723 b,
27724 [
27725 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
27726 ],
27727 ),
27728 1 => simd_shuffle!(
27729 a,
27730 b,
27731 [
27732 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
27733 ],
27734 ),
27735 2 => simd_shuffle!(
27736 a,
27737 b,
27738 [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
27739 ),
27740 3 => simd_shuffle!(
27741 a,
27742 b,
27743 [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
27744 ),
27745 4 => simd_shuffle!(
27746 a,
27747 b,
27748 [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
27749 ),
27750 5 => simd_shuffle!(
27751 a,
27752 b,
27753 [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
27754 ),
27755 6 => simd_shuffle!(
27756 a,
27757 b,
27758 [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
27759 ),
27760 7 => simd_shuffle!(
27761 a,
27762 b,
27763 [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
27764 ),
27765 8 => simd_shuffle!(
27766 a,
27767 b,
27768 [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
27769 ),
27770 9 => simd_shuffle!(
27771 a,
27772 b,
27773 [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
27774 ),
27775 10 => simd_shuffle!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
27776 11 => simd_shuffle!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
27777 12 => simd_shuffle!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
27778 13 => simd_shuffle!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
27779 14 => simd_shuffle!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
27780 15 => simd_shuffle!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
27781 _ => unreachable_unchecked(),
27782 };
27783 transmute(r)
27784 }
27785}
27786
27787/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27788///
27789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi32&expand=246)
27790#[inline]
27791#[target_feature(enable = "avx512f")]
27792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27793#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27794#[rustc_legacy_const_generics(4)]
27795pub fn _mm512_mask_alignr_epi32<const IMM8: i32>(
27796 src: __m512i,
27797 k: __mmask16,
27798 a: __m512i,
27799 b: __m512i,
27800) -> __m512i {
27801 unsafe {
27802 static_assert_uimm_bits!(IMM8, 8);
27803 let r: __m512i = _mm512_alignr_epi32::<IMM8>(a, b);
27804 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
27805 }
27806}
27807
27808/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27809///
27810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi32&expand=247)
27811#[inline]
27812#[target_feature(enable = "avx512f")]
27813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27814#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27815#[rustc_legacy_const_generics(3)]
27816pub fn _mm512_maskz_alignr_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27817 unsafe {
27818 static_assert_uimm_bits!(IMM8, 8);
27819 let r: __m512i = _mm512_alignr_epi32::<IMM8>(a, b);
27820 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
27821 }
27822}
27823
27824/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst.
27825///
27826/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 28 bytes)!</div>
27827///
27828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242)
27829#[inline]
27830#[target_feature(enable = "avx512f,avx512vl")]
27831#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27832#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27833#[rustc_legacy_const_generics(2)]
27834pub fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
27835 unsafe {
27836 static_assert_uimm_bits!(IMM8, 8);
27837 let a: i32x8 = a.as_i32x8();
27838 let b: i32x8 = b.as_i32x8();
27839 let imm8: i32 = IMM8 % 8;
27840 let r: i32x8 = match imm8 {
27841 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
27842 1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
27843 2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
27844 3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
27845 4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
27846 5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
27847 6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
27848 7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
27849 _ => unreachable_unchecked(),
27850 };
27851 transmute(src:r)
27852 }
27853}
27854
27855/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27856///
27857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243)
27858#[inline]
27859#[target_feature(enable = "avx512f,avx512vl")]
27860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27861#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27862#[rustc_legacy_const_generics(4)]
27863pub fn _mm256_mask_alignr_epi32<const IMM8: i32>(
27864 src: __m256i,
27865 k: __mmask8,
27866 a: __m256i,
27867 b: __m256i,
27868) -> __m256i {
27869 unsafe {
27870 static_assert_uimm_bits!(IMM8, 8);
27871 let r: __m256i = _mm256_alignr_epi32::<IMM8>(a, b);
27872 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
27873 }
27874}
27875
27876/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27877///
27878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244)
27879#[inline]
27880#[target_feature(enable = "avx512f,avx512vl")]
27881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27882#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27883#[rustc_legacy_const_generics(3)]
27884pub fn _mm256_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27885 unsafe {
27886 static_assert_uimm_bits!(IMM8, 8);
27887 let r: __m256i = _mm256_alignr_epi32::<IMM8>(a, b);
27888 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
27889 }
27890}
27891
27892/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst.
27893///
27894/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 12 bytes)!</div>
27895///
27896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239)
27897#[inline]
27898#[target_feature(enable = "avx512f,avx512vl")]
27899#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27900#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignd
27901#[rustc_legacy_const_generics(2)]
27902pub fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
27903 unsafe {
27904 static_assert_uimm_bits!(IMM8, 8);
27905 let a: i32x4 = a.as_i32x4();
27906 let b: i32x4 = b.as_i32x4();
27907 let imm8: i32 = IMM8 % 4;
27908 let r: i32x4 = match imm8 {
27909 0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
27910 1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
27911 2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
27912 3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
27913 _ => unreachable_unchecked(),
27914 };
27915 transmute(src:r)
27916 }
27917}
27918
27919/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27920///
27921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240)
27922#[inline]
27923#[target_feature(enable = "avx512f,avx512vl")]
27924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27925#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27926#[rustc_legacy_const_generics(4)]
27927pub fn _mm_mask_alignr_epi32<const IMM8: i32>(
27928 src: __m128i,
27929 k: __mmask8,
27930 a: __m128i,
27931 b: __m128i,
27932) -> __m128i {
27933 unsafe {
27934 static_assert_uimm_bits!(IMM8, 8);
27935 let r: __m128i = _mm_alignr_epi32::<IMM8>(a, b);
27936 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
27937 }
27938}
27939
27940/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27941///
27942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241)
27943#[inline]
27944#[target_feature(enable = "avx512f,avx512vl")]
27945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27946#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27947#[rustc_legacy_const_generics(3)]
27948pub fn _mm_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27949 unsafe {
27950 static_assert_uimm_bits!(IMM8, 8);
27951 let r: __m128i = _mm_alignr_epi32::<IMM8>(a, b);
27952 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
27953 }
27954}
27955
27956/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
27957///
27958/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 56 bytes)!</div>
27959///
27960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi64&expand=254)
27961#[inline]
27962#[target_feature(enable = "avx512f")]
27963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27964#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27965#[rustc_legacy_const_generics(2)]
27966pub fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27967 unsafe {
27968 static_assert_uimm_bits!(IMM8, 8);
27969 let imm8: i32 = IMM8 % 8;
27970 let r: i64x8 = match imm8 {
27971 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
27972 1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
27973 2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
27974 3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
27975 4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
27976 5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
27977 6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
27978 7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
27979 _ => unreachable_unchecked(),
27980 };
27981 transmute(src:r)
27982 }
27983}
27984
27985/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27986///
27987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi64&expand=255)
27988#[inline]
27989#[target_feature(enable = "avx512f")]
27990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
27991#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27992#[rustc_legacy_const_generics(4)]
27993pub fn _mm512_mask_alignr_epi64<const IMM8: i32>(
27994 src: __m512i,
27995 k: __mmask8,
27996 a: __m512i,
27997 b: __m512i,
27998) -> __m512i {
27999 unsafe {
28000 static_assert_uimm_bits!(IMM8, 8);
28001 let r: __m512i = _mm512_alignr_epi64::<IMM8>(a, b);
28002 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
28003 }
28004}
28005
28006/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28007///
28008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi64&expand=256)
28009#[inline]
28010#[target_feature(enable = "avx512f")]
28011#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28012#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28013#[rustc_legacy_const_generics(3)]
28014pub fn _mm512_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28015 unsafe {
28016 static_assert_uimm_bits!(IMM8, 8);
28017 let r: __m512i = _mm512_alignr_epi64::<IMM8>(a, b);
28018 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
28019 }
28020}
28021
28022/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst.
28023///
28024/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 24 bytes)!</div>
28025///
28026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251)
28027#[inline]
28028#[target_feature(enable = "avx512f,avx512vl")]
28029#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28030#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28031#[rustc_legacy_const_generics(2)]
28032pub fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
28033 unsafe {
28034 static_assert_uimm_bits!(IMM8, 8);
28035 let imm8: i32 = IMM8 % 4;
28036 let r: i64x4 = match imm8 {
28037 0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
28038 1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
28039 2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
28040 3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
28041 _ => unreachable_unchecked(),
28042 };
28043 transmute(src:r)
28044 }
28045}
28046
28047/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28048///
28049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252)
28050#[inline]
28051#[target_feature(enable = "avx512f,avx512vl")]
28052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28053#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28054#[rustc_legacy_const_generics(4)]
28055pub fn _mm256_mask_alignr_epi64<const IMM8: i32>(
28056 src: __m256i,
28057 k: __mmask8,
28058 a: __m256i,
28059 b: __m256i,
28060) -> __m256i {
28061 unsafe {
28062 static_assert_uimm_bits!(IMM8, 8);
28063 let r: __m256i = _mm256_alignr_epi64::<IMM8>(a, b);
28064 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
28065 }
28066}
28067
28068/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28069///
28070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253)
28071#[inline]
28072#[target_feature(enable = "avx512f,avx512vl")]
28073#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28074#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28075#[rustc_legacy_const_generics(3)]
28076pub fn _mm256_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28077 unsafe {
28078 static_assert_uimm_bits!(IMM8, 8);
28079 let r: __m256i = _mm256_alignr_epi64::<IMM8>(a, b);
28080 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
28081 }
28082}
28083
28084/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst.
28085///
28086/// <div class="warning">Only lowest <strong>bit</strong> is used from the mask (shift at maximum by 8 bytes)!</div>
28087///
28088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248)
28089#[inline]
28090#[target_feature(enable = "avx512f,avx512vl")]
28091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28092#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignq
28093#[rustc_legacy_const_generics(2)]
28094pub fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
28095 unsafe {
28096 static_assert_uimm_bits!(IMM8, 8);
28097 let imm8: i32 = IMM8 % 2;
28098 let r: i64x2 = match imm8 {
28099 0 => simd_shuffle!(a, b, [2, 3]),
28100 1 => simd_shuffle!(a, b, [3, 0]),
28101 _ => unreachable_unchecked(),
28102 };
28103 transmute(src:r)
28104 }
28105}
28106
28107/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28108///
28109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249)
28110#[inline]
28111#[target_feature(enable = "avx512f,avx512vl")]
28112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28113#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28114#[rustc_legacy_const_generics(4)]
28115pub fn _mm_mask_alignr_epi64<const IMM8: i32>(
28116 src: __m128i,
28117 k: __mmask8,
28118 a: __m128i,
28119 b: __m128i,
28120) -> __m128i {
28121 unsafe {
28122 static_assert_uimm_bits!(IMM8, 8);
28123 let r: __m128i = _mm_alignr_epi64::<IMM8>(a, b);
28124 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x2(), no:src.as_i64x2()))
28125 }
28126}
28127
28128/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28129///
28130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250)
28131#[inline]
28132#[target_feature(enable = "avx512f,avx512vl")]
28133#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28134#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28135#[rustc_legacy_const_generics(3)]
28136pub fn _mm_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28137 unsafe {
28138 static_assert_uimm_bits!(IMM8, 8);
28139 let r: __m128i = _mm_alignr_epi64::<IMM8>(a, b);
28140 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x2(), no:i64x2::ZERO))
28141 }
28142}
28143
28144/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
28145///
28146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272)
28147#[inline]
28148#[target_feature(enable = "avx512f")]
28149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28150#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
28151pub fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
28152 unsafe { transmute(src:simd_and(x:a.as_i32x16(), y:b.as_i32x16())) }
28153}
28154
28155/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28156///
28157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273)
28158#[inline]
28159#[target_feature(enable = "avx512f")]
28160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28161#[cfg_attr(test, assert_instr(vpandd))]
28162pub fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28163 unsafe {
28164 let and: i32x16 = _mm512_and_epi32(a, b).as_i32x16();
28165 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x16()))
28166 }
28167}
28168
28169/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28170///
28171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274)
28172#[inline]
28173#[target_feature(enable = "avx512f")]
28174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28175#[cfg_attr(test, assert_instr(vpandd))]
28176pub fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28177 unsafe {
28178 let and: i32x16 = _mm512_and_epi32(a, b).as_i32x16();
28179 transmute(src:simd_select_bitmask(m:k, yes:and, no:i32x16::ZERO))
28180 }
28181}
28182
28183/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28184///
28185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270)
28186#[inline]
28187#[target_feature(enable = "avx512f,avx512vl")]
28188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28189#[cfg_attr(test, assert_instr(vpandd))]
28190pub fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28191 unsafe {
28192 let and: i32x8 = simd_and(x:a.as_i32x8(), y:b.as_i32x8());
28193 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x8()))
28194 }
28195}
28196
28197/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28198///
28199/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271)
28200#[inline]
28201#[target_feature(enable = "avx512f,avx512vl")]
28202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28203#[cfg_attr(test, assert_instr(vpandd))]
28204pub fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28205 unsafe {
28206 let and: i32x8 = simd_and(x:a.as_i32x8(), y:b.as_i32x8());
28207 transmute(src:simd_select_bitmask(m:k, yes:and, no:i32x8::ZERO))
28208 }
28209}
28210
28211/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28212///
28213/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268)
28214#[inline]
28215#[target_feature(enable = "avx512f,avx512vl")]
28216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28217#[cfg_attr(test, assert_instr(vpandd))]
28218pub fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28219 unsafe {
28220 let and: i32x4 = simd_and(x:a.as_i32x4(), y:b.as_i32x4());
28221 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x4()))
28222 }
28223}
28224
28225/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28226///
28227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269)
28228#[inline]
28229#[target_feature(enable = "avx512f,avx512vl")]
28230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28231#[cfg_attr(test, assert_instr(vpandd))]
28232pub fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28233 unsafe {
28234 let and: i32x4 = simd_and(x:a.as_i32x4(), y:b.as_i32x4());
28235 transmute(src:simd_select_bitmask(m:k, yes:and, no:i32x4::ZERO))
28236 }
28237}
28238
28239/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
28240///
28241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279)
28242#[inline]
28243#[target_feature(enable = "avx512f")]
28244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28245#[cfg_attr(test, assert_instr(vpandq))]
28246pub fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
28247 unsafe { transmute(src:simd_and(x:a.as_i64x8(), y:b.as_i64x8())) }
28248}
28249
28250/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28251///
28252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280)
28253#[inline]
28254#[target_feature(enable = "avx512f")]
28255#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28256#[cfg_attr(test, assert_instr(vpandq))]
28257pub fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28258 unsafe {
28259 let and: i64x8 = _mm512_and_epi64(a, b).as_i64x8();
28260 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x8()))
28261 }
28262}
28263
28264/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28265///
28266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281)
28267#[inline]
28268#[target_feature(enable = "avx512f")]
28269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28270#[cfg_attr(test, assert_instr(vpandq))]
28271pub fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28272 unsafe {
28273 let and: i64x8 = _mm512_and_epi64(a, b).as_i64x8();
28274 transmute(src:simd_select_bitmask(m:k, yes:and, no:i64x8::ZERO))
28275 }
28276}
28277
28278/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28279///
28280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277)
28281#[inline]
28282#[target_feature(enable = "avx512f,avx512vl")]
28283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28284#[cfg_attr(test, assert_instr(vpandq))]
28285pub fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28286 unsafe {
28287 let and: i64x4 = simd_and(x:a.as_i64x4(), y:b.as_i64x4());
28288 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x4()))
28289 }
28290}
28291
28292/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28293///
28294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278)
28295#[inline]
28296#[target_feature(enable = "avx512f,avx512vl")]
28297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28298#[cfg_attr(test, assert_instr(vpandq))]
28299pub fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28300 unsafe {
28301 let and: i64x4 = simd_and(x:a.as_i64x4(), y:b.as_i64x4());
28302 transmute(src:simd_select_bitmask(m:k, yes:and, no:i64x4::ZERO))
28303 }
28304}
28305
28306/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28307///
28308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275)
28309#[inline]
28310#[target_feature(enable = "avx512f,avx512vl")]
28311#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28312#[cfg_attr(test, assert_instr(vpandq))]
28313pub fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28314 unsafe {
28315 let and: i64x2 = simd_and(x:a.as_i64x2(), y:b.as_i64x2());
28316 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x2()))
28317 }
28318}
28319
28320/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28321///
28322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276)
28323#[inline]
28324#[target_feature(enable = "avx512f,avx512vl")]
28325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28326#[cfg_attr(test, assert_instr(vpandq))]
28327pub fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28328 unsafe {
28329 let and: i64x2 = simd_and(x:a.as_i64x2(), y:b.as_i64x2());
28330 transmute(src:simd_select_bitmask(m:k, yes:and, no:i64x2::ZERO))
28331 }
28332}
28333
28334/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
28335///
28336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302)
28337#[inline]
28338#[target_feature(enable = "avx512f")]
28339#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28340#[cfg_attr(test, assert_instr(vpandq))]
28341pub fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
28342 unsafe { transmute(src:simd_and(x:a.as_i32x16(), y:b.as_i32x16())) }
28343}
28344
28345/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28346///
28347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042)
28348#[inline]
28349#[target_feature(enable = "avx512f")]
28350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28351#[cfg_attr(test, assert_instr(vporq))]
28352pub fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
28353 unsafe { transmute(src:simd_or(x:a.as_i32x16(), y:b.as_i32x16())) }
28354}
28355
28356/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28357///
28358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040)
28359#[inline]
28360#[target_feature(enable = "avx512f")]
28361#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28362#[cfg_attr(test, assert_instr(vpord))]
28363pub fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28364 unsafe {
28365 let or: i32x16 = _mm512_or_epi32(a, b).as_i32x16();
28366 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x16()))
28367 }
28368}
28369
28370/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28371///
28372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041)
28373#[inline]
28374#[target_feature(enable = "avx512f")]
28375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28376#[cfg_attr(test, assert_instr(vpord))]
28377pub fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28378 unsafe {
28379 let or: i32x16 = _mm512_or_epi32(a, b).as_i32x16();
28380 transmute(src:simd_select_bitmask(m:k, yes:or, no:i32x16::ZERO))
28381 }
28382}
28383
28384/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28385///
28386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039)
28387#[inline]
28388#[target_feature(enable = "avx512f,avx512vl")]
28389#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28390#[cfg_attr(test, assert_instr(vor))] //should be vpord
28391pub fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
28392 unsafe { transmute(src:simd_or(x:a.as_i32x8(), y:b.as_i32x8())) }
28393}
28394
28395/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28396///
28397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037)
28398#[inline]
28399#[target_feature(enable = "avx512f,avx512vl")]
28400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28401#[cfg_attr(test, assert_instr(vpord))]
28402pub fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28403 unsafe {
28404 let or: i32x8 = _mm256_or_epi32(a, b).as_i32x8();
28405 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x8()))
28406 }
28407}
28408
28409/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28410///
28411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038)
28412#[inline]
28413#[target_feature(enable = "avx512f,avx512vl")]
28414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28415#[cfg_attr(test, assert_instr(vpord))]
28416pub fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28417 unsafe {
28418 let or: i32x8 = _mm256_or_epi32(a, b).as_i32x8();
28419 transmute(src:simd_select_bitmask(m:k, yes:or, no:i32x8::ZERO))
28420 }
28421}
28422
28423/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28424///
28425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036)
28426#[inline]
28427#[target_feature(enable = "avx512f,avx512vl")]
28428#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28429#[cfg_attr(test, assert_instr(vor))] //should be vpord
28430pub fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
28431 unsafe { transmute(src:simd_or(x:a.as_i32x4(), y:b.as_i32x4())) }
28432}
28433
28434/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28435///
28436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034)
28437#[inline]
28438#[target_feature(enable = "avx512f,avx512vl")]
28439#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28440#[cfg_attr(test, assert_instr(vpord))]
28441pub fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28442 unsafe {
28443 let or: i32x4 = _mm_or_epi32(a, b).as_i32x4();
28444 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x4()))
28445 }
28446}
28447
28448/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28449///
28450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035)
28451#[inline]
28452#[target_feature(enable = "avx512f,avx512vl")]
28453#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28454#[cfg_attr(test, assert_instr(vpord))]
28455pub fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28456 unsafe {
28457 let or: i32x4 = _mm_or_epi32(a, b).as_i32x4();
28458 transmute(src:simd_select_bitmask(m:k, yes:or, no:i32x4::ZERO))
28459 }
28460}
28461
28462/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28463///
28464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051)
28465#[inline]
28466#[target_feature(enable = "avx512f")]
28467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28468#[cfg_attr(test, assert_instr(vporq))]
28469pub fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
28470 unsafe { transmute(src:simd_or(x:a.as_i64x8(), y:b.as_i64x8())) }
28471}
28472
28473/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28474///
28475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049)
28476#[inline]
28477#[target_feature(enable = "avx512f")]
28478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28479#[cfg_attr(test, assert_instr(vporq))]
28480pub fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28481 unsafe {
28482 let or: i64x8 = _mm512_or_epi64(a, b).as_i64x8();
28483 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x8()))
28484 }
28485}
28486
28487/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28488///
28489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050)
28490#[inline]
28491#[target_feature(enable = "avx512f")]
28492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28493#[cfg_attr(test, assert_instr(vporq))]
28494pub fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28495 unsafe {
28496 let or: i64x8 = _mm512_or_epi64(a, b).as_i64x8();
28497 transmute(src:simd_select_bitmask(m:k, yes:or, no:i64x8::ZERO))
28498 }
28499}
28500
28501/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28502///
28503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048)
28504#[inline]
28505#[target_feature(enable = "avx512f,avx512vl")]
28506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28507#[cfg_attr(test, assert_instr(vor))] //should be vporq
28508pub fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
28509 unsafe { transmute(src:simd_or(x:a.as_i64x4(), y:b.as_i64x4())) }
28510}
28511
28512/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28513///
28514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046)
28515#[inline]
28516#[target_feature(enable = "avx512f,avx512vl")]
28517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28518#[cfg_attr(test, assert_instr(vporq))]
28519pub fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28520 unsafe {
28521 let or: i64x4 = _mm256_or_epi64(a, b).as_i64x4();
28522 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x4()))
28523 }
28524}
28525
28526/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28527///
28528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047)
28529#[inline]
28530#[target_feature(enable = "avx512f,avx512vl")]
28531#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28532#[cfg_attr(test, assert_instr(vporq))]
28533pub fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28534 unsafe {
28535 let or: i64x4 = _mm256_or_epi64(a, b).as_i64x4();
28536 transmute(src:simd_select_bitmask(m:k, yes:or, no:i64x4::ZERO))
28537 }
28538}
28539
28540/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28541///
28542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045)
28543#[inline]
28544#[target_feature(enable = "avx512f,avx512vl")]
28545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28546#[cfg_attr(test, assert_instr(vor))] //should be vporq
28547pub fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
28548 unsafe { transmute(src:simd_or(x:a.as_i64x2(), y:b.as_i64x2())) }
28549}
28550
28551/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28552///
28553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043)
28554#[inline]
28555#[target_feature(enable = "avx512f,avx512vl")]
28556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28557#[cfg_attr(test, assert_instr(vporq))]
28558pub fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28559 unsafe {
28560 let or: i64x2 = _mm_or_epi64(a, b).as_i64x2();
28561 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x2()))
28562 }
28563}
28564
28565/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28566///
28567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044)
28568#[inline]
28569#[target_feature(enable = "avx512f,avx512vl")]
28570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28571#[cfg_attr(test, assert_instr(vporq))]
28572pub fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28573 unsafe {
28574 let or: i64x2 = _mm_or_epi64(a, b).as_i64x2();
28575 transmute(src:simd_select_bitmask(m:k, yes:or, no:i64x2::ZERO))
28576 }
28577}
28578
28579/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
28580///
28581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072)
28582#[inline]
28583#[target_feature(enable = "avx512f")]
28584#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28585#[cfg_attr(test, assert_instr(vporq))]
28586pub fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
28587 unsafe { transmute(src:simd_or(x:a.as_i32x16(), y:b.as_i32x16())) }
28588}
28589
28590/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28591///
28592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142)
28593#[inline]
28594#[target_feature(enable = "avx512f")]
28595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28596#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
28597pub fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
28598 unsafe { transmute(src:simd_xor(x:a.as_i32x16(), y:b.as_i32x16())) }
28599}
28600
28601/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28602///
28603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140)
28604#[inline]
28605#[target_feature(enable = "avx512f")]
28606#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28607#[cfg_attr(test, assert_instr(vpxord))]
28608pub fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28609 unsafe {
28610 let xor: i32x16 = _mm512_xor_epi32(a, b).as_i32x16();
28611 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x16()))
28612 }
28613}
28614
28615/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28616///
28617/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141)
28618#[inline]
28619#[target_feature(enable = "avx512f")]
28620#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28621#[cfg_attr(test, assert_instr(vpxord))]
28622pub fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28623 unsafe {
28624 let xor: i32x16 = _mm512_xor_epi32(a, b).as_i32x16();
28625 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i32x16::ZERO))
28626 }
28627}
28628
28629/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28630///
28631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139)
28632#[inline]
28633#[target_feature(enable = "avx512f,avx512vl")]
28634#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28635#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28636pub fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
28637 unsafe { transmute(src:simd_xor(x:a.as_i32x8(), y:b.as_i32x8())) }
28638}
28639
28640/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28641///
28642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137)
28643#[inline]
28644#[target_feature(enable = "avx512f,avx512vl")]
28645#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28646#[cfg_attr(test, assert_instr(vpxord))]
28647pub fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28648 unsafe {
28649 let xor: i32x8 = _mm256_xor_epi32(a, b).as_i32x8();
28650 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x8()))
28651 }
28652}
28653
28654/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28655///
28656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138)
28657#[inline]
28658#[target_feature(enable = "avx512f,avx512vl")]
28659#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28660#[cfg_attr(test, assert_instr(vpxord))]
28661pub fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28662 unsafe {
28663 let xor: i32x8 = _mm256_xor_epi32(a, b).as_i32x8();
28664 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i32x8::ZERO))
28665 }
28666}
28667
28668/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28669///
28670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136)
28671#[inline]
28672#[target_feature(enable = "avx512f,avx512vl")]
28673#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28674#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28675pub fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
28676 unsafe { transmute(src:simd_xor(x:a.as_i32x4(), y:b.as_i32x4())) }
28677}
28678
28679/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28680///
28681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134)
28682#[inline]
28683#[target_feature(enable = "avx512f,avx512vl")]
28684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28685#[cfg_attr(test, assert_instr(vpxord))]
28686pub fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28687 unsafe {
28688 let xor: i32x4 = _mm_xor_epi32(a, b).as_i32x4();
28689 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x4()))
28690 }
28691}
28692
28693/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28694///
28695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135)
28696#[inline]
28697#[target_feature(enable = "avx512f,avx512vl")]
28698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28699#[cfg_attr(test, assert_instr(vpxord))]
28700pub fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28701 unsafe {
28702 let xor: i32x4 = _mm_xor_epi32(a, b).as_i32x4();
28703 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i32x4::ZERO))
28704 }
28705}
28706
28707/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28708///
28709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151)
28710#[inline]
28711#[target_feature(enable = "avx512f")]
28712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28713#[cfg_attr(test, assert_instr(vpxorq))]
28714pub fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
28715 unsafe { transmute(src:simd_xor(x:a.as_i64x8(), y:b.as_i64x8())) }
28716}
28717
28718/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28719///
28720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149)
28721#[inline]
28722#[target_feature(enable = "avx512f")]
28723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28724#[cfg_attr(test, assert_instr(vpxorq))]
28725pub fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28726 unsafe {
28727 let xor: i64x8 = _mm512_xor_epi64(a, b).as_i64x8();
28728 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x8()))
28729 }
28730}
28731
28732/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28733///
28734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150)
28735#[inline]
28736#[target_feature(enable = "avx512f")]
28737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28738#[cfg_attr(test, assert_instr(vpxorq))]
28739pub fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28740 unsafe {
28741 let xor: i64x8 = _mm512_xor_epi64(a, b).as_i64x8();
28742 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i64x8::ZERO))
28743 }
28744}
28745
28746/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28747///
28748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148)
28749#[inline]
28750#[target_feature(enable = "avx512f,avx512vl")]
28751#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28752#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28753pub fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
28754 unsafe { transmute(src:simd_xor(x:a.as_i64x4(), y:b.as_i64x4())) }
28755}
28756
28757/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28758///
28759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146)
28760#[inline]
28761#[target_feature(enable = "avx512f,avx512vl")]
28762#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28763#[cfg_attr(test, assert_instr(vpxorq))]
28764pub fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28765 unsafe {
28766 let xor: i64x4 = _mm256_xor_epi64(a, b).as_i64x4();
28767 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x4()))
28768 }
28769}
28770
28771/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28772///
28773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147)
28774#[inline]
28775#[target_feature(enable = "avx512f,avx512vl")]
28776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28777#[cfg_attr(test, assert_instr(vpxorq))]
28778pub fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28779 unsafe {
28780 let xor: i64x4 = _mm256_xor_epi64(a, b).as_i64x4();
28781 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i64x4::ZERO))
28782 }
28783}
28784
28785/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28786///
28787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145)
28788#[inline]
28789#[target_feature(enable = "avx512f,avx512vl")]
28790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28791#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28792pub fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
28793 unsafe { transmute(src:simd_xor(x:a.as_i64x2(), y:b.as_i64x2())) }
28794}
28795
28796/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28797///
28798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143)
28799#[inline]
28800#[target_feature(enable = "avx512f,avx512vl")]
28801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28802#[cfg_attr(test, assert_instr(vpxorq))]
28803pub fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28804 unsafe {
28805 let xor: i64x2 = _mm_xor_epi64(a, b).as_i64x2();
28806 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x2()))
28807 }
28808}
28809
28810/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28811///
28812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144)
28813#[inline]
28814#[target_feature(enable = "avx512f,avx512vl")]
28815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28816#[cfg_attr(test, assert_instr(vpxorq))]
28817pub fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28818 unsafe {
28819 let xor: i64x2 = _mm_xor_epi64(a, b).as_i64x2();
28820 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i64x2::ZERO))
28821 }
28822}
28823
28824/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
28825///
28826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172)
28827#[inline]
28828#[target_feature(enable = "avx512f")]
28829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28830#[cfg_attr(test, assert_instr(vpxorq))]
28831pub fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
28832 unsafe { transmute(src:simd_xor(x:a.as_i32x16(), y:b.as_i32x16())) }
28833}
28834
28835/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
28836///
28837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310)
28838#[inline]
28839#[target_feature(enable = "avx512f")]
28840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28841#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28842pub fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
28843 _mm512_and_epi32(a:_mm512_xor_epi32(a, b:_mm512_set1_epi32(u32::MAX as i32)), b)
28844}
28845
28846/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28847///
28848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311)
28849#[inline]
28850#[target_feature(enable = "avx512f")]
28851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28852#[cfg_attr(test, assert_instr(vpandnd))]
28853pub fn _mm512_mask_andnot_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28854 unsafe {
28855 let andnot: i32x16 = _mm512_andnot_epi32(a, b).as_i32x16();
28856 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x16()))
28857 }
28858}
28859
28860/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28861///
28862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312)
28863#[inline]
28864#[target_feature(enable = "avx512f")]
28865#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28866#[cfg_attr(test, assert_instr(vpandnd))]
28867pub fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28868 unsafe {
28869 let andnot: i32x16 = _mm512_andnot_epi32(a, b).as_i32x16();
28870 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i32x16::ZERO))
28871 }
28872}
28873
28874/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28875///
28876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308)
28877#[inline]
28878#[target_feature(enable = "avx512f,avx512vl")]
28879#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28880#[cfg_attr(test, assert_instr(vpandnd))]
28881pub fn _mm256_mask_andnot_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28882 unsafe {
28883 let not: __m256i = _mm256_xor_epi32(a, b:_mm256_set1_epi32(u32::MAX as i32));
28884 let andnot: i32x8 = simd_and(x:not.as_i32x8(), y:b.as_i32x8());
28885 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x8()))
28886 }
28887}
28888
28889/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28890///
28891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309)
28892#[inline]
28893#[target_feature(enable = "avx512f,avx512vl")]
28894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28895#[cfg_attr(test, assert_instr(vpandnd))]
28896pub fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28897 unsafe {
28898 let not: __m256i = _mm256_xor_epi32(a, b:_mm256_set1_epi32(u32::MAX as i32));
28899 let andnot: i32x8 = simd_and(x:not.as_i32x8(), y:b.as_i32x8());
28900 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i32x8::ZERO))
28901 }
28902}
28903
28904/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28905///
28906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306)
28907#[inline]
28908#[target_feature(enable = "avx512f,avx512vl")]
28909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28910#[cfg_attr(test, assert_instr(vpandnd))]
28911pub fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28912 unsafe {
28913 let not: __m128i = _mm_xor_epi32(a, b:_mm_set1_epi32(u32::MAX as i32));
28914 let andnot: i32x4 = simd_and(x:not.as_i32x4(), y:b.as_i32x4());
28915 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x4()))
28916 }
28917}
28918
28919/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28920///
28921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307)
28922#[inline]
28923#[target_feature(enable = "avx512f,avx512vl")]
28924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28925#[cfg_attr(test, assert_instr(vpandnd))]
28926pub fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28927 unsafe {
28928 let not: __m128i = _mm_xor_epi32(a, b:_mm_set1_epi32(u32::MAX as i32));
28929 let andnot: i32x4 = simd_and(x:not.as_i32x4(), y:b.as_i32x4());
28930 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i32x4::ZERO))
28931 }
28932}
28933
28934/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
28935///
28936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317)
28937#[inline]
28938#[target_feature(enable = "avx512f")]
28939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28940#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28941pub fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
28942 _mm512_and_epi64(a:_mm512_xor_epi64(a, b:_mm512_set1_epi64(u64::MAX as i64)), b)
28943}
28944
28945/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28946///
28947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318)
28948#[inline]
28949#[target_feature(enable = "avx512f")]
28950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28951#[cfg_attr(test, assert_instr(vpandnq))]
28952pub fn _mm512_mask_andnot_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28953 unsafe {
28954 let andnot: i64x8 = _mm512_andnot_epi64(a, b).as_i64x8();
28955 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x8()))
28956 }
28957}
28958
28959/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28960///
28961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319)
28962#[inline]
28963#[target_feature(enable = "avx512f")]
28964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28965#[cfg_attr(test, assert_instr(vpandnq))]
28966pub fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28967 unsafe {
28968 let andnot: i64x8 = _mm512_andnot_epi64(a, b).as_i64x8();
28969 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i64x8::ZERO))
28970 }
28971}
28972
28973/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28974///
28975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315)
28976#[inline]
28977#[target_feature(enable = "avx512f,avx512vl")]
28978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28979#[cfg_attr(test, assert_instr(vpandnq))]
28980pub fn _mm256_mask_andnot_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28981 unsafe {
28982 let not: __m256i = _mm256_xor_epi64(a, b:_mm256_set1_epi64x(u64::MAX as i64));
28983 let andnot: i64x4 = simd_and(x:not.as_i64x4(), y:b.as_i64x4());
28984 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x4()))
28985 }
28986}
28987
28988/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28989///
28990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316)
28991#[inline]
28992#[target_feature(enable = "avx512f,avx512vl")]
28993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
28994#[cfg_attr(test, assert_instr(vpandnq))]
28995pub fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28996 unsafe {
28997 let not: __m256i = _mm256_xor_epi64(a, b:_mm256_set1_epi64x(u64::MAX as i64));
28998 let andnot: i64x4 = simd_and(x:not.as_i64x4(), y:b.as_i64x4());
28999 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i64x4::ZERO))
29000 }
29001}
29002
29003/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29004///
29005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313)
29006#[inline]
29007#[target_feature(enable = "avx512f,avx512vl")]
29008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29009#[cfg_attr(test, assert_instr(vpandnq))]
29010pub fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29011 unsafe {
29012 let not: __m128i = _mm_xor_epi64(a, b:_mm_set1_epi64x(u64::MAX as i64));
29013 let andnot: i64x2 = simd_and(x:not.as_i64x2(), y:b.as_i64x2());
29014 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x2()))
29015 }
29016}
29017
29018/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29019///
29020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314)
29021#[inline]
29022#[target_feature(enable = "avx512f,avx512vl")]
29023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29024#[cfg_attr(test, assert_instr(vpandnq))]
29025pub fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29026 unsafe {
29027 let not: __m128i = _mm_xor_epi64(a, b:_mm_set1_epi64x(u64::MAX as i64));
29028 let andnot: i64x2 = simd_and(x:not.as_i64x2(), y:b.as_i64x2());
29029 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i64x2::ZERO))
29030 }
29031}
29032
29033/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
29034///
29035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340)
29036#[inline]
29037#[target_feature(enable = "avx512f")]
29038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29039#[cfg_attr(test, assert_instr(vpandnq))]
29040pub fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
29041 _mm512_and_epi64(a:_mm512_xor_epi64(a, b:_mm512_set1_epi64(u64::MAX as i64)), b)
29042}
29043
29044/// Convert 16-bit mask a into an integer value, and store the result in dst.
29045///
29046/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
29047#[inline]
29048#[target_feature(enable = "avx512f")]
29049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29050pub fn _cvtmask16_u32(a: __mmask16) -> u32 {
29051 a as u32
29052}
29053
29054/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
29055///
29056/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
29057#[inline]
29058#[target_feature(enable = "avx512f")]
29059#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29060pub fn _cvtu32_mask16(a: u32) -> __mmask16 {
29061 a as __mmask16
29062}
29063
29064/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29065///
29066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
29067#[inline]
29068#[target_feature(enable = "avx512f")]
29069#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29070#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29071pub fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29072 a & b
29073}
29074
29075/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29076///
29077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kand&expand=3210)
29078#[inline]
29079#[target_feature(enable = "avx512f")]
29080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29081#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29082pub fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
29083 a & b
29084}
29085
29086/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29087///
29088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239)
29089#[inline]
29090#[target_feature(enable = "avx512f")]
29091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29092#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29093pub fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29094 a | b
29095}
29096
29097/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29098///
29099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kor&expand=3237)
29100#[inline]
29101#[target_feature(enable = "avx512f")]
29102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29103#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29104pub fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
29105 a | b
29106}
29107
29108/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29109///
29110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291)
29111#[inline]
29112#[target_feature(enable = "avx512f")]
29113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29114#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29115pub fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29116 a ^ b
29117}
29118
29119/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29120///
29121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxor&expand=3289)
29122#[inline]
29123#[target_feature(enable = "avx512f")]
29124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29125#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29126pub fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
29127 a ^ b
29128}
29129
29130/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29131///
29132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233)
29133#[inline]
29134#[target_feature(enable = "avx512f")]
29135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29136pub fn _knot_mask16(a: __mmask16) -> __mmask16 {
29137 a ^ 0b11111111_11111111
29138}
29139
29140/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29141///
29142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_knot&expand=3231)
29143#[inline]
29144#[target_feature(enable = "avx512f")]
29145#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29146pub fn _mm512_knot(a: __mmask16) -> __mmask16 {
29147 a ^ 0b11111111_11111111
29148}
29149
29150/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29151///
29152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218)
29153#[inline]
29154#[target_feature(enable = "avx512f")]
29155#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29156#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
29157pub fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29158 _mm512_kand(a:_mm512_knot(a), b)
29159}
29160
29161/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29162///
29163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kandn&expand=3216)
29164#[inline]
29165#[target_feature(enable = "avx512f")]
29166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29167#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
29168pub fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
29169 _mm512_kand(a:_mm512_knot(a), b)
29170}
29171
29172/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29173///
29174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285)
29175#[inline]
29176#[target_feature(enable = "avx512f")]
29177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29178#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
29179pub fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29180 _mm512_knot(_mm512_kxor(a, b))
29181}
29182
29183/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29184///
29185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxnor&expand=3283)
29186#[inline]
29187#[target_feature(enable = "avx512f")]
29188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29189#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
29190pub fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
29191 _mm512_knot(_mm512_kxor(a, b))
29192}
29193
29194/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29195/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
29196///
29197/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
29198#[inline]
29199#[target_feature(enable = "avx512f")]
29200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29201pub unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
29202 let tmp: u16 = _kor_mask16(a, b);
29203 *all_ones = (tmp == 0xffff) as u8;
29204 (tmp == 0) as u8
29205}
29206
29207/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
29208/// store 0 in dst.
29209///
29210/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
29211#[inline]
29212#[target_feature(enable = "avx512f")]
29213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29214pub fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29215 (_kor_mask16(a, b) == 0xffff) as u8
29216}
29217
29218/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29219/// store 0 in dst.
29220///
29221/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
29222#[inline]
29223#[target_feature(enable = "avx512f")]
29224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29225pub fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29226 (_kor_mask16(a, b) == 0) as u8
29227}
29228
29229/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
29230///
29231/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
29232#[inline]
29233#[target_feature(enable = "avx512f")]
29234#[rustc_legacy_const_generics(1)]
29235#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29236pub fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29237 a << COUNT
29238}
29239
29240/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
29241///
29242/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
29243#[inline]
29244#[target_feature(enable = "avx512f")]
29245#[rustc_legacy_const_generics(1)]
29246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29247pub fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29248 a >> COUNT
29249}
29250
29251/// Load 16-bit mask from memory
29252///
29253/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
29254#[inline]
29255#[target_feature(enable = "avx512f")]
29256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29257pub unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
29258 *mem_addr
29259}
29260
29261/// Store 16-bit mask to memory
29262///
29263/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
29264#[inline]
29265#[target_feature(enable = "avx512f")]
29266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29267pub unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
29268 *mem_addr = a;
29269}
29270
29271/// Copy 16-bit mask a to k.
29272///
29273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
29274#[inline]
29275#[target_feature(enable = "avx512f")]
29276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29277#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29278pub fn _mm512_kmov(a: __mmask16) -> __mmask16 {
29279 a
29280}
29281
29282/// Converts integer mask into bitmask, storing the result in dst.
29283///
29284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_int2mask&expand=3189)
29285#[inline]
29286#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
29287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29288pub fn _mm512_int2mask(mask: i32) -> __mmask16 {
29289 mask as u16
29290}
29291
29292/// Converts bit mask k1 into an integer value, storing the results in dst.
29293///
29294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2int&expand=3544)
29295#[inline]
29296#[target_feature(enable = "avx512f")]
29297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29298#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29299pub fn _mm512_mask2int(k1: __mmask16) -> i32 {
29300 k1 as i32
29301}
29302
29303/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
29304///
29305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackb&expand=3280)
29306#[inline]
29307#[target_feature(enable = "avx512f")]
29308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29309#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
29310pub fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
29311 ((a & 0xff) << 8) | (b & 0xff)
29312}
29313
29314/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
29315///
29316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestc&expand=3247)
29317#[inline]
29318#[target_feature(enable = "avx512f")]
29319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29320#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
29321pub fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
29322 let r: bool = (a | b) == 0b11111111_11111111;
29323 r as i32
29324}
29325
29326/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
29327///
29328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestz)
29329#[inline]
29330#[target_feature(enable = "avx512f")]
29331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29332#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
29333pub fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
29334 let r: bool = (a | b) == 0;
29335 r as i32
29336}
29337
29338/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29339///
29340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
29341#[inline]
29342#[target_feature(enable = "avx512f")]
29343#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29344#[cfg_attr(test, assert_instr(vptestmd))]
29345pub fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29346 let and: __m512i = _mm512_and_epi32(a, b);
29347 let zero: __m512i = _mm512_setzero_si512();
29348 _mm512_cmpneq_epi32_mask(a:and, b:zero)
29349}
29350
29351/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29352///
29353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889)
29354#[inline]
29355#[target_feature(enable = "avx512f")]
29356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29357#[cfg_attr(test, assert_instr(vptestmd))]
29358pub fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29359 let and: __m512i = _mm512_and_epi32(a, b);
29360 let zero: __m512i = _mm512_setzero_si512();
29361 _mm512_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
29362}
29363
29364/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29365///
29366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888)
29367#[inline]
29368#[target_feature(enable = "avx512f,avx512vl")]
29369#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29370#[cfg_attr(test, assert_instr(vptestmd))]
29371pub fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29372 let and: __m256i = _mm256_and_si256(a, b);
29373 let zero: __m256i = _mm256_setzero_si256();
29374 _mm256_cmpneq_epi32_mask(a:and, b:zero)
29375}
29376
29377/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29378///
29379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887)
29380#[inline]
29381#[target_feature(enable = "avx512f,avx512vl")]
29382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29383#[cfg_attr(test, assert_instr(vptestmd))]
29384pub fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29385 let and: __m256i = _mm256_and_si256(a, b);
29386 let zero: __m256i = _mm256_setzero_si256();
29387 _mm256_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
29388}
29389
29390/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29391///
29392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886)
29393#[inline]
29394#[target_feature(enable = "avx512f,avx512vl")]
29395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29396#[cfg_attr(test, assert_instr(vptestmd))]
29397pub fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29398 let and: __m128i = _mm_and_si128(a, b);
29399 let zero: __m128i = _mm_setzero_si128();
29400 _mm_cmpneq_epi32_mask(a:and, b:zero)
29401}
29402
29403/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29404///
29405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885)
29406#[inline]
29407#[target_feature(enable = "avx512f,avx512vl")]
29408#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29409#[cfg_attr(test, assert_instr(vptestmd))]
29410pub fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29411 let and: __m128i = _mm_and_si128(a, b);
29412 let zero: __m128i = _mm_setzero_si128();
29413 _mm_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
29414}
29415
29416/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29417///
29418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896)
29419#[inline]
29420#[target_feature(enable = "avx512f")]
29421#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29422#[cfg_attr(test, assert_instr(vptestmq))]
29423pub fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29424 let and: __m512i = _mm512_and_epi64(a, b);
29425 let zero: __m512i = _mm512_setzero_si512();
29426 _mm512_cmpneq_epi64_mask(a:and, b:zero)
29427}
29428
29429/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29430///
29431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895)
29432#[inline]
29433#[target_feature(enable = "avx512f")]
29434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29435#[cfg_attr(test, assert_instr(vptestmq))]
29436pub fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29437 let and: __m512i = _mm512_and_epi64(a, b);
29438 let zero: __m512i = _mm512_setzero_si512();
29439 _mm512_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
29440}
29441
29442/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29443///
29444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894)
29445#[inline]
29446#[target_feature(enable = "avx512f,avx512vl")]
29447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29448#[cfg_attr(test, assert_instr(vptestmq))]
29449pub fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29450 let and: __m256i = _mm256_and_si256(a, b);
29451 let zero: __m256i = _mm256_setzero_si256();
29452 _mm256_cmpneq_epi64_mask(a:and, b:zero)
29453}
29454
29455/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29456///
29457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893)
29458#[inline]
29459#[target_feature(enable = "avx512f,avx512vl")]
29460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29461#[cfg_attr(test, assert_instr(vptestmq))]
29462pub fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29463 let and: __m256i = _mm256_and_si256(a, b);
29464 let zero: __m256i = _mm256_setzero_si256();
29465 _mm256_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
29466}
29467
29468/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29469///
29470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892)
29471#[inline]
29472#[target_feature(enable = "avx512f,avx512vl")]
29473#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29474#[cfg_attr(test, assert_instr(vptestmq))]
29475pub fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29476 let and: __m128i = _mm_and_si128(a, b);
29477 let zero: __m128i = _mm_setzero_si128();
29478 _mm_cmpneq_epi64_mask(a:and, b:zero)
29479}
29480
29481/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29482///
29483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891)
29484#[inline]
29485#[target_feature(enable = "avx512f,avx512vl")]
29486#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29487#[cfg_attr(test, assert_instr(vptestmq))]
29488pub fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29489 let and: __m128i = _mm_and_si128(a, b);
29490 let zero: __m128i = _mm_setzero_si128();
29491 _mm_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
29492}
29493
29494/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29495///
29496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921)
29497#[inline]
29498#[target_feature(enable = "avx512f")]
29499#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29500#[cfg_attr(test, assert_instr(vptestnmd))]
29501pub fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29502 let and: __m512i = _mm512_and_epi32(a, b);
29503 let zero: __m512i = _mm512_setzero_si512();
29504 _mm512_cmpeq_epi32_mask(a:and, b:zero)
29505}
29506
29507/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29508///
29509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920)
29510#[inline]
29511#[target_feature(enable = "avx512f")]
29512#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29513#[cfg_attr(test, assert_instr(vptestnmd))]
29514pub fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29515 let and: __m512i = _mm512_and_epi32(a, b);
29516 let zero: __m512i = _mm512_setzero_si512();
29517 _mm512_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
29518}
29519
29520/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29521///
29522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919)
29523#[inline]
29524#[target_feature(enable = "avx512f,avx512vl")]
29525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29526#[cfg_attr(test, assert_instr(vptestnmd))]
29527pub fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29528 let and: __m256i = _mm256_and_si256(a, b);
29529 let zero: __m256i = _mm256_setzero_si256();
29530 _mm256_cmpeq_epi32_mask(a:and, b:zero)
29531}
29532
29533/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29534///
29535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918)
29536#[inline]
29537#[target_feature(enable = "avx512f,avx512vl")]
29538#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29539#[cfg_attr(test, assert_instr(vptestnmd))]
29540pub fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29541 let and: __m256i = _mm256_and_si256(a, b);
29542 let zero: __m256i = _mm256_setzero_si256();
29543 _mm256_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
29544}
29545
29546/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29547///
29548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917)
29549#[inline]
29550#[target_feature(enable = "avx512f,avx512vl")]
29551#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29552#[cfg_attr(test, assert_instr(vptestnmd))]
29553pub fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29554 let and: __m128i = _mm_and_si128(a, b);
29555 let zero: __m128i = _mm_setzero_si128();
29556 _mm_cmpeq_epi32_mask(a:and, b:zero)
29557}
29558
29559/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29560///
29561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916)
29562#[inline]
29563#[target_feature(enable = "avx512f,avx512vl")]
29564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29565#[cfg_attr(test, assert_instr(vptestnmd))]
29566pub fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29567 let and: __m128i = _mm_and_si128(a, b);
29568 let zero: __m128i = _mm_setzero_si128();
29569 _mm_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
29570}
29571
29572/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29573///
29574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927)
29575#[inline]
29576#[target_feature(enable = "avx512f")]
29577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29578#[cfg_attr(test, assert_instr(vptestnmq))]
29579pub fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29580 let and: __m512i = _mm512_and_epi64(a, b);
29581 let zero: __m512i = _mm512_setzero_si512();
29582 _mm512_cmpeq_epi64_mask(a:and, b:zero)
29583}
29584
29585/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29586///
29587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926)
29588#[inline]
29589#[target_feature(enable = "avx512f")]
29590#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29591#[cfg_attr(test, assert_instr(vptestnmq))]
29592pub fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29593 let and: __m512i = _mm512_and_epi64(a, b);
29594 let zero: __m512i = _mm512_setzero_si512();
29595 _mm512_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
29596}
29597
29598/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29599///
29600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925)
29601#[inline]
29602#[target_feature(enable = "avx512f,avx512vl")]
29603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29604#[cfg_attr(test, assert_instr(vptestnmq))]
29605pub fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29606 let and: __m256i = _mm256_and_si256(a, b);
29607 let zero: __m256i = _mm256_setzero_si256();
29608 _mm256_cmpeq_epi64_mask(a:and, b:zero)
29609}
29610
29611/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29612///
29613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924)
29614#[inline]
29615#[target_feature(enable = "avx512f,avx512vl")]
29616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29617#[cfg_attr(test, assert_instr(vptestnmq))]
29618pub fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29619 let and: __m256i = _mm256_and_si256(a, b);
29620 let zero: __m256i = _mm256_setzero_si256();
29621 _mm256_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
29622}
29623
29624/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29625///
29626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923)
29627#[inline]
29628#[target_feature(enable = "avx512f,avx512vl")]
29629#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29630#[cfg_attr(test, assert_instr(vptestnmq))]
29631pub fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29632 let and: __m128i = _mm_and_si128(a, b);
29633 let zero: __m128i = _mm_setzero_si128();
29634 _mm_cmpeq_epi64_mask(a:and, b:zero)
29635}
29636
29637/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29638///
29639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922)
29640#[inline]
29641#[target_feature(enable = "avx512f,avx512vl")]
29642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29643#[cfg_attr(test, assert_instr(vptestnmq))]
29644pub fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29645 let and: __m128i = _mm_and_si128(a, b);
29646 let zero: __m128i = _mm_setzero_si128();
29647 _mm_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
29648}
29649
29650/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29651///
29652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
29653///
29654/// # Safety of non-temporal stores
29655///
29656/// After using this intrinsic, but before any other access to the memory that this intrinsic
29657/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29658/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29659/// return.
29660///
29661/// See [`_mm_sfence`] for details.
29662#[inline]
29663#[target_feature(enable = "avx512f")]
29664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29665#[cfg_attr(test, assert_instr(vmovntps))]
29666#[allow(clippy::cast_ptr_alignment)]
29667pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
29668 crate::arch::asm!(
29669 vps!("vmovntps", ",{a}"),
29670 p = in(reg) mem_addr,
29671 a = in(zmm_reg) a,
29672 options(nostack, preserves_flags),
29673 );
29674}
29675
29676/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29677///
29678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
29679///
29680/// # Safety of non-temporal stores
29681///
29682/// After using this intrinsic, but before any other access to the memory that this intrinsic
29683/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29684/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29685/// return.
29686///
29687/// See [`_mm_sfence`] for details.
29688#[inline]
29689#[target_feature(enable = "avx512f")]
29690#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29691#[cfg_attr(test, assert_instr(vmovntpd))]
29692#[allow(clippy::cast_ptr_alignment)]
29693pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
29694 crate::arch::asm!(
29695 vps!("vmovntpd", ",{a}"),
29696 p = in(reg) mem_addr,
29697 a = in(zmm_reg) a,
29698 options(nostack, preserves_flags),
29699 );
29700}
29701
29702/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29703///
29704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
29705///
29706/// # Safety of non-temporal stores
29707///
29708/// After using this intrinsic, but before any other access to the memory that this intrinsic
29709/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29710/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29711/// return.
29712///
29713/// See [`_mm_sfence`] for details.
29714#[inline]
29715#[target_feature(enable = "avx512f")]
29716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29717#[cfg_attr(test, assert_instr(vmovntdq))]
29718#[allow(clippy::cast_ptr_alignment)]
29719pub unsafe fn _mm512_stream_si512(mem_addr: *mut __m512i, a: __m512i) {
29720 crate::arch::asm!(
29721 vps!("vmovntdq", ",{a}"),
29722 p = in(reg) mem_addr,
29723 a = in(zmm_reg) a,
29724 options(nostack, preserves_flags),
29725 );
29726}
29727
29728/// Load 512-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
29729/// must be aligned on a 64-byte boundary or a general-protection exception may be generated. To
29730/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon)
29731///
29732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_load_si512)
29733#[inline]
29734#[target_feature(enable = "avx512f")]
29735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29736pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i {
29737 let dst: __m512i;
29738 crate::arch::asm!(
29739 vpl!("vmovntdqa {a}"),
29740 a = out(zmm_reg) dst,
29741 p = in(reg) mem_addr,
29742 options(pure, readonly, nostack, preserves_flags),
29743 );
29744 dst
29745}
29746
29747/// Sets packed 32-bit integers in `dst` with the supplied values.
29748///
29749/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931)
29750#[inline]
29751#[target_feature(enable = "avx512f")]
29752#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29753pub fn _mm512_set_ps(
29754 e0: f32,
29755 e1: f32,
29756 e2: f32,
29757 e3: f32,
29758 e4: f32,
29759 e5: f32,
29760 e6: f32,
29761 e7: f32,
29762 e8: f32,
29763 e9: f32,
29764 e10: f32,
29765 e11: f32,
29766 e12: f32,
29767 e13: f32,
29768 e14: f32,
29769 e15: f32,
29770) -> __m512 {
29771 _mm512_setr_ps(
29772 e0:e15, e1:e14, e2:e13, e3:e12, e4:e11, e5:e10, e6:e9, e7:e8, e8:e7, e9:e6, e10:e5, e11:e4, e12:e3, e13:e2, e14:e1, e15:e0,
29773 )
29774}
29775
29776/// Sets packed 32-bit integers in `dst` with the supplied values in
29777/// reverse order.
29778///
29779/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008)
29780#[inline]
29781#[target_feature(enable = "avx512f")]
29782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29783pub fn _mm512_setr_ps(
29784 e0: f32,
29785 e1: f32,
29786 e2: f32,
29787 e3: f32,
29788 e4: f32,
29789 e5: f32,
29790 e6: f32,
29791 e7: f32,
29792 e8: f32,
29793 e9: f32,
29794 e10: f32,
29795 e11: f32,
29796 e12: f32,
29797 e13: f32,
29798 e14: f32,
29799 e15: f32,
29800) -> __m512 {
29801 unsafe {
29802 let r: f32x16 = f32x16::new(
29803 x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
29804 );
29805 transmute(src:r)
29806 }
29807}
29808
29809/// Broadcast 64-bit float `a` to all elements of `dst`.
29810///
29811/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975)
29812#[inline]
29813#[target_feature(enable = "avx512f")]
29814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29815pub fn _mm512_set1_pd(a: f64) -> __m512d {
29816 unsafe { transmute(src:f64x8::splat(a)) }
29817}
29818
29819/// Broadcast 32-bit float `a` to all elements of `dst`.
29820///
29821/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981)
29822#[inline]
29823#[target_feature(enable = "avx512f")]
29824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29825pub fn _mm512_set1_ps(a: f32) -> __m512 {
29826 unsafe { transmute(src:f32x16::splat(a)) }
29827}
29828
29829/// Sets packed 32-bit integers in `dst` with the supplied values.
29830///
29831/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908)
29832#[inline]
29833#[target_feature(enable = "avx512f")]
29834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29835pub fn _mm512_set_epi32(
29836 e15: i32,
29837 e14: i32,
29838 e13: i32,
29839 e12: i32,
29840 e11: i32,
29841 e10: i32,
29842 e9: i32,
29843 e8: i32,
29844 e7: i32,
29845 e6: i32,
29846 e5: i32,
29847 e4: i32,
29848 e3: i32,
29849 e2: i32,
29850 e1: i32,
29851 e0: i32,
29852) -> __m512i {
29853 _mm512_setr_epi32(
29854 e15:e0, e14:e1, e13:e2, e12:e3, e11:e4, e10:e5, e9:e6, e8:e7, e7:e8, e6:e9, e5:e10, e4:e11, e3:e12, e2:e13, e1:e14, e0:e15,
29855 )
29856}
29857
29858/// Broadcast 8-bit integer a to all elements of dst.
29859///
29860/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972)
29861#[inline]
29862#[target_feature(enable = "avx512f")]
29863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29864pub fn _mm512_set1_epi8(a: i8) -> __m512i {
29865 unsafe { transmute(src:i8x64::splat(a)) }
29866}
29867
29868/// Broadcast the low packed 16-bit integer from a to all elements of dst.
29869///
29870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944)
29871#[inline]
29872#[target_feature(enable = "avx512f")]
29873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29874pub fn _mm512_set1_epi16(a: i16) -> __m512i {
29875 unsafe { transmute(src:i16x32::splat(a)) }
29876}
29877
29878/// Broadcast 32-bit integer `a` to all elements of `dst`.
29879///
29880/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi32)
29881#[inline]
29882#[target_feature(enable = "avx512f")]
29883#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29884pub fn _mm512_set1_epi32(a: i32) -> __m512i {
29885 unsafe { transmute(src:i32x16::splat(a)) }
29886}
29887
29888/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29889///
29890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951)
29891#[inline]
29892#[target_feature(enable = "avx512f")]
29893#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29894#[cfg_attr(test, assert_instr(vpbroadcastd))]
29895pub fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
29896 unsafe {
29897 let r: i32x16 = _mm512_set1_epi32(a).as_i32x16();
29898 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
29899 }
29900}
29901
29902/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29903///
29904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952)
29905#[inline]
29906#[target_feature(enable = "avx512f")]
29907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29908#[cfg_attr(test, assert_instr(vpbroadcastd))]
29909pub fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
29910 unsafe {
29911 let r: i32x16 = _mm512_set1_epi32(a).as_i32x16();
29912 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
29913 }
29914}
29915
29916/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29917///
29918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948)
29919#[inline]
29920#[target_feature(enable = "avx512f,avx512vl")]
29921#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29922#[cfg_attr(test, assert_instr(vpbroadcastd))]
29923pub fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
29924 unsafe {
29925 let r: i32x8 = _mm256_set1_epi32(a).as_i32x8();
29926 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
29927 }
29928}
29929
29930/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29931///
29932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949)
29933#[inline]
29934#[target_feature(enable = "avx512f,avx512vl")]
29935#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29936#[cfg_attr(test, assert_instr(vpbroadcastd))]
29937pub fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
29938 unsafe {
29939 let r: i32x8 = _mm256_set1_epi32(a).as_i32x8();
29940 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
29941 }
29942}
29943
29944/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29945///
29946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945)
29947#[inline]
29948#[target_feature(enable = "avx512f,avx512vl")]
29949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29950#[cfg_attr(test, assert_instr(vpbroadcastd))]
29951pub fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
29952 unsafe {
29953 let r: i32x4 = _mm_set1_epi32(a).as_i32x4();
29954 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
29955 }
29956}
29957
29958/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29959///
29960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946)
29961#[inline]
29962#[target_feature(enable = "avx512f,avx512vl")]
29963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29964#[cfg_attr(test, assert_instr(vpbroadcastd))]
29965pub fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
29966 unsafe {
29967 let r: i32x4 = _mm_set1_epi32(a).as_i32x4();
29968 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
29969 }
29970}
29971
29972/// Broadcast 64-bit integer `a` to all elements of `dst`.
29973///
29974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961)
29975#[inline]
29976#[target_feature(enable = "avx512f")]
29977#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29978pub fn _mm512_set1_epi64(a: i64) -> __m512i {
29979 unsafe { transmute(src:i64x8::splat(a)) }
29980}
29981
29982/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29983///
29984/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959)
29985#[inline]
29986#[target_feature(enable = "avx512f")]
29987#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
29988#[cfg_attr(test, assert_instr(vpbroadcastq))]
29989pub fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
29990 unsafe {
29991 let r: i64x8 = _mm512_set1_epi64(a).as_i64x8();
29992 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x8()))
29993 }
29994}
29995
29996/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29997///
29998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960)
29999#[inline]
30000#[target_feature(enable = "avx512f")]
30001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30002#[cfg_attr(test, assert_instr(vpbroadcastq))]
30003pub fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
30004 unsafe {
30005 let r: i64x8 = _mm512_set1_epi64(a).as_i64x8();
30006 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x8::ZERO))
30007 }
30008}
30009
30010/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30011///
30012/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957)
30013#[inline]
30014#[target_feature(enable = "avx512f,avx512vl")]
30015#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30016#[cfg_attr(test, assert_instr(vpbroadcastq))]
30017pub fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
30018 unsafe {
30019 let r: i64x4 = _mm256_set1_epi64x(a).as_i64x4();
30020 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x4()))
30021 }
30022}
30023
30024/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30025///
30026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958)
30027#[inline]
30028#[target_feature(enable = "avx512f,avx512vl")]
30029#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30030#[cfg_attr(test, assert_instr(vpbroadcastq))]
30031pub fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
30032 unsafe {
30033 let r: i64x4 = _mm256_set1_epi64x(a).as_i64x4();
30034 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x4::ZERO))
30035 }
30036}
30037
30038/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30039///
30040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954)
30041#[inline]
30042#[target_feature(enable = "avx512f,avx512vl")]
30043#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30044#[cfg_attr(test, assert_instr(vpbroadcastq))]
30045pub fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
30046 unsafe {
30047 let r: i64x2 = _mm_set1_epi64x(a).as_i64x2();
30048 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x2()))
30049 }
30050}
30051
30052/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30053///
30054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955)
30055#[inline]
30056#[target_feature(enable = "avx512f,avx512vl")]
30057#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30058#[cfg_attr(test, assert_instr(vpbroadcastq))]
30059pub fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
30060 unsafe {
30061 let r: i64x2 = _mm_set1_epi64x(a).as_i64x2();
30062 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x2::ZERO))
30063 }
30064}
30065
30066/// Set packed 64-bit integers in dst with the repeated 4 element sequence.
30067///
30068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983)
30069#[inline]
30070#[target_feature(enable = "avx512f")]
30071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30072pub fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30073 _mm512_set_epi64(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a)
30074}
30075
30076/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
30077///
30078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010)
30079#[inline]
30080#[target_feature(enable = "avx512f")]
30081#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30082pub fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30083 _mm512_set_epi64(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d)
30084}
30085
30086/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30087///
30088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074)
30089#[inline]
30090#[target_feature(enable = "avx512f")]
30091#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30092#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30093pub fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30094 _mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b)
30095}
30096
30097/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30098///
30099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075)
30100#[inline]
30101#[target_feature(enable = "avx512f")]
30102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30103#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30104pub fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30105 _mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b)
30106}
30107
30108/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30109///
30110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154)
30111#[inline]
30112#[target_feature(enable = "avx512f")]
30113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30114#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30115pub fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30116 _mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b)
30117}
30118
30119/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30120///
30121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155)
30122#[inline]
30123#[target_feature(enable = "avx512f")]
30124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30125#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30126pub fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30127 _mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b)
30128}
30129
30130/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30131///
30132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013)
30133#[inline]
30134#[target_feature(enable = "avx512f")]
30135#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30136#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30137pub fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30138 _mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b)
30139}
30140
30141/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30142///
30143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014)
30144#[inline]
30145#[target_feature(enable = "avx512f")]
30146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30147#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30148pub fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30149 _mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b)
30150}
30151
30152/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30153///
30154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146)
30155#[inline]
30156#[target_feature(enable = "avx512f")]
30157#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30158#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30159pub fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30160 _mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b)
30161}
30162
30163/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30164///
30165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147)
30166#[inline]
30167#[target_feature(enable = "avx512f")]
30168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30169#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30170pub fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30171 _mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b)
30172}
30173
30174/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30175///
30176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828)
30177#[inline]
30178#[target_feature(enable = "avx512f")]
30179#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30180#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30181pub fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30182 _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b)
30183}
30184
30185/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30186///
30187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829)
30188#[inline]
30189#[target_feature(enable = "avx512f")]
30190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30191#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30192pub fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30193 _mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b)
30194}
30195
30196/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30197///
30198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130)
30199#[inline]
30200#[target_feature(enable = "avx512f")]
30201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30202#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30203pub fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30204 _mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b)
30205}
30206
30207/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30208///
30209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131)
30210#[inline]
30211#[target_feature(enable = "avx512f")]
30212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30213#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30214pub fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30215 _mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b)
30216}
30217
30218/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30219///
30220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749)
30221#[inline]
30222#[target_feature(enable = "avx512f")]
30223#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30224#[rustc_legacy_const_generics(2)]
30225#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30226pub fn _mm512_cmp_ps_mask<const IMM8: i32>(a: __m512, b: __m512) -> __mmask16 {
30227 unsafe {
30228 static_assert_uimm_bits!(IMM8, 5);
30229 let neg_one: i16 = -1;
30230 let a: f32x16 = a.as_f32x16();
30231 let b: f32x16 = b.as_f32x16();
30232 let r: i16 = vcmpps(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
30233 r.cast_unsigned()
30234 }
30235}
30236
30237/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30238///
30239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750)
30240#[inline]
30241#[target_feature(enable = "avx512f")]
30242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30243#[rustc_legacy_const_generics(3)]
30244#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30245pub fn _mm512_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30246 unsafe {
30247 static_assert_uimm_bits!(IMM8, 5);
30248 let a: f32x16 = a.as_f32x16();
30249 let b: f32x16 = b.as_f32x16();
30250 let r: i16 = vcmpps(a, b, IMM8, m:k1 as i16, _MM_FROUND_CUR_DIRECTION);
30251 r.cast_unsigned()
30252 }
30253}
30254
30255/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30256///
30257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747)
30258#[inline]
30259#[target_feature(enable = "avx512f,avx512vl")]
30260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30261#[rustc_legacy_const_generics(2)]
30262#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30263pub fn _mm256_cmp_ps_mask<const IMM8: i32>(a: __m256, b: __m256) -> __mmask8 {
30264 unsafe {
30265 static_assert_uimm_bits!(IMM8, 5);
30266 let neg_one: i8 = -1;
30267 let a: f32x8 = a.as_f32x8();
30268 let b: f32x8 = b.as_f32x8();
30269 let r: i8 = vcmpps256(a, b, IMM8, m:neg_one);
30270 r.cast_unsigned()
30271 }
30272}
30273
30274/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30275///
30276/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748)
30277#[inline]
30278#[target_feature(enable = "avx512f,avx512vl")]
30279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30280#[rustc_legacy_const_generics(3)]
30281#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30282pub fn _mm256_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256, b: __m256) -> __mmask8 {
30283 unsafe {
30284 static_assert_uimm_bits!(IMM8, 5);
30285 let a: f32x8 = a.as_f32x8();
30286 let b: f32x8 = b.as_f32x8();
30287 let r: i8 = vcmpps256(a, b, IMM8, m:k1 as i8);
30288 r.cast_unsigned()
30289 }
30290}
30291
30292/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30293///
30294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745)
30295#[inline]
30296#[target_feature(enable = "avx512f,avx512vl")]
30297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30298#[rustc_legacy_const_generics(2)]
30299#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30300pub fn _mm_cmp_ps_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30301 unsafe {
30302 static_assert_uimm_bits!(IMM8, 5);
30303 let neg_one: i8 = -1;
30304 let a: f32x4 = a.as_f32x4();
30305 let b: f32x4 = b.as_f32x4();
30306 let r: i8 = vcmpps128(a, b, IMM8, m:neg_one);
30307 r.cast_unsigned()
30308 }
30309}
30310
30311/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30312///
30313/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746)
30314#[inline]
30315#[target_feature(enable = "avx512f,avx512vl")]
30316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30317#[rustc_legacy_const_generics(3)]
30318#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30319pub fn _mm_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30320 unsafe {
30321 static_assert_uimm_bits!(IMM8, 5);
30322 let a: f32x4 = a.as_f32x4();
30323 let b: f32x4 = b.as_f32x4();
30324 let r: i8 = vcmpps128(a, b, IMM8, m:k1 as i8);
30325 r.cast_unsigned()
30326 }
30327}
30328
30329/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30330/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30331///
30332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753)
30333#[inline]
30334#[target_feature(enable = "avx512f")]
30335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30336#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30337#[rustc_legacy_const_generics(2, 3)]
30338pub fn _mm512_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30339 a: __m512,
30340 b: __m512,
30341) -> __mmask16 {
30342 unsafe {
30343 static_assert_uimm_bits!(IMM5, 5);
30344 static_assert_mantissas_sae!(SAE);
30345 let neg_one: i16 = -1;
30346 let a: f32x16 = a.as_f32x16();
30347 let b: f32x16 = b.as_f32x16();
30348 let r: i16 = vcmpps(a, b, IMM5, m:neg_one, SAE);
30349 r.cast_unsigned()
30350 }
30351}
30352
30353/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30354/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30355///
30356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754)
30357#[inline]
30358#[target_feature(enable = "avx512f")]
30359#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30360#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30361#[rustc_legacy_const_generics(3, 4)]
30362pub fn _mm512_mask_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30363 m: __mmask16,
30364 a: __m512,
30365 b: __m512,
30366) -> __mmask16 {
30367 unsafe {
30368 static_assert_uimm_bits!(IMM5, 5);
30369 static_assert_mantissas_sae!(SAE);
30370 let a: f32x16 = a.as_f32x16();
30371 let b: f32x16 = b.as_f32x16();
30372 let r: i16 = vcmpps(a, b, IMM5, m as i16, SAE);
30373 r.cast_unsigned()
30374 }
30375}
30376
30377/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30378///
30379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162)
30380#[inline]
30381#[target_feature(enable = "avx512f")]
30382#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30383#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps
30384pub fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30385 _mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b)
30386}
30387
30388/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30389///
30390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163)
30391#[inline]
30392#[target_feature(enable = "avx512f")]
30393#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30394#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30395pub fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30396 _mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b)
30397}
30398
30399/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30400///
30401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170)
30402#[inline]
30403#[target_feature(enable = "avx512f")]
30404#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30405#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30406pub fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30407 _mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b)
30408}
30409
30410/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30411///
30412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
30413#[inline]
30414#[target_feature(enable = "avx512f")]
30415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30416#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30417pub fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30418 _mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b)
30419}
30420
30421/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30422///
30423/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071)
30424#[inline]
30425#[target_feature(enable = "avx512f")]
30426#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30427#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30428pub fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30429 _mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b)
30430}
30431
30432/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30433///
30434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072)
30435#[inline]
30436#[target_feature(enable = "avx512f")]
30437#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30438#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30439pub fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30440 _mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b)
30441}
30442
30443/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30444///
30445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151)
30446#[inline]
30447#[target_feature(enable = "avx512f")]
30448#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30449#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30450pub fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30451 _mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b)
30452}
30453
30454/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30455///
30456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
30457#[inline]
30458#[target_feature(enable = "avx512f")]
30459#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30460#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30461pub fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30462 _mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(k1:m, a, b)
30463}
30464
30465/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30466///
30467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010)
30468#[inline]
30469#[target_feature(enable = "avx512f")]
30470#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30471#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30472pub fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30473 _mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b)
30474}
30475
30476/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30477///
30478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011)
30479#[inline]
30480#[target_feature(enable = "avx512f")]
30481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30482#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30483pub fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30484 _mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b)
30485}
30486
30487/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30488///
30489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143)
30490#[inline]
30491#[target_feature(enable = "avx512f")]
30492#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30493#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30494pub fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30495 _mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b)
30496}
30497
30498/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30499///
30500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
30501#[inline]
30502#[target_feature(enable = "avx512f")]
30503#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30504#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30505pub fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30506 _mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b)
30507}
30508
30509/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30510///
30511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822)
30512#[inline]
30513#[target_feature(enable = "avx512f")]
30514#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30515#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30516pub fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30517 _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b)
30518}
30519
30520/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30521///
30522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823)
30523#[inline]
30524#[target_feature(enable = "avx512f")]
30525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30526#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30527pub fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30528 _mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b)
30529}
30530
30531/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30532///
30533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127)
30534#[inline]
30535#[target_feature(enable = "avx512f")]
30536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30537#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30538pub fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30539 _mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b)
30540}
30541
30542/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30543///
30544/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
30545#[inline]
30546#[target_feature(enable = "avx512f")]
30547#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30548#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30549pub fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30550 _mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b)
30551}
30552
30553/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30554///
30555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741)
30556#[inline]
30557#[target_feature(enable = "avx512f")]
30558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30559#[rustc_legacy_const_generics(2)]
30560#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30561pub fn _mm512_cmp_pd_mask<const IMM8: i32>(a: __m512d, b: __m512d) -> __mmask8 {
30562 unsafe {
30563 static_assert_uimm_bits!(IMM8, 5);
30564 let neg_one: i8 = -1;
30565 let a: f64x8 = a.as_f64x8();
30566 let b: f64x8 = b.as_f64x8();
30567 let r: i8 = vcmppd(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
30568 r.cast_unsigned()
30569 }
30570}
30571
30572/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30573///
30574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742)
30575#[inline]
30576#[target_feature(enable = "avx512f")]
30577#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30578#[rustc_legacy_const_generics(3)]
30579#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30580pub fn _mm512_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30581 unsafe {
30582 static_assert_uimm_bits!(IMM8, 5);
30583 let a: f64x8 = a.as_f64x8();
30584 let b: f64x8 = b.as_f64x8();
30585 let r: i8 = vcmppd(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
30586 r.cast_unsigned()
30587 }
30588}
30589
30590/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30591///
30592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739)
30593#[inline]
30594#[target_feature(enable = "avx512f,avx512vl")]
30595#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30596#[rustc_legacy_const_generics(2)]
30597#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30598pub fn _mm256_cmp_pd_mask<const IMM8: i32>(a: __m256d, b: __m256d) -> __mmask8 {
30599 unsafe {
30600 static_assert_uimm_bits!(IMM8, 5);
30601 let neg_one: i8 = -1;
30602 let a: f64x4 = a.as_f64x4();
30603 let b: f64x4 = b.as_f64x4();
30604 let r: i8 = vcmppd256(a, b, IMM8, m:neg_one);
30605 r.cast_unsigned()
30606 }
30607}
30608
30609/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30610///
30611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740)
30612#[inline]
30613#[target_feature(enable = "avx512f,avx512vl")]
30614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30615#[rustc_legacy_const_generics(3)]
30616#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30617pub fn _mm256_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d, b: __m256d) -> __mmask8 {
30618 unsafe {
30619 static_assert_uimm_bits!(IMM8, 5);
30620 let a: f64x4 = a.as_f64x4();
30621 let b: f64x4 = b.as_f64x4();
30622 let r: i8 = vcmppd256(a, b, IMM8, m:k1 as i8);
30623 r.cast_unsigned()
30624 }
30625}
30626
30627/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30628///
30629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737)
30630#[inline]
30631#[target_feature(enable = "avx512f,avx512vl")]
30632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30633#[rustc_legacy_const_generics(2)]
30634#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30635pub fn _mm_cmp_pd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30636 unsafe {
30637 static_assert_uimm_bits!(IMM8, 5);
30638 let neg_one: i8 = -1;
30639 let a: f64x2 = a.as_f64x2();
30640 let b: f64x2 = b.as_f64x2();
30641 let r: i8 = vcmppd128(a, b, IMM8, m:neg_one);
30642 r.cast_unsigned()
30643 }
30644}
30645
30646/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30647///
30648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738)
30649#[inline]
30650#[target_feature(enable = "avx512f,avx512vl")]
30651#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30652#[rustc_legacy_const_generics(3)]
30653#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30654pub fn _mm_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30655 unsafe {
30656 static_assert_uimm_bits!(IMM8, 5);
30657 let a: f64x2 = a.as_f64x2();
30658 let b: f64x2 = b.as_f64x2();
30659 let r: i8 = vcmppd128(a, b, IMM8, m:k1 as i8);
30660 r.cast_unsigned()
30661 }
30662}
30663
30664/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30665/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30666///
30667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751)
30668#[inline]
30669#[target_feature(enable = "avx512f")]
30670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30671#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30672#[rustc_legacy_const_generics(2, 3)]
30673pub fn _mm512_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30674 a: __m512d,
30675 b: __m512d,
30676) -> __mmask8 {
30677 unsafe {
30678 static_assert_uimm_bits!(IMM5, 5);
30679 static_assert_mantissas_sae!(SAE);
30680 let neg_one: i8 = -1;
30681 let a: f64x8 = a.as_f64x8();
30682 let b: f64x8 = b.as_f64x8();
30683 let r: i8 = vcmppd(a, b, IMM5, m:neg_one, SAE);
30684 r.cast_unsigned()
30685 }
30686}
30687
30688/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30689/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30690///
30691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752)
30692#[inline]
30693#[target_feature(enable = "avx512f")]
30694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30695#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30696#[rustc_legacy_const_generics(3, 4)]
30697pub fn _mm512_mask_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30698 k1: __mmask8,
30699 a: __m512d,
30700 b: __m512d,
30701) -> __mmask8 {
30702 unsafe {
30703 static_assert_uimm_bits!(IMM5, 5);
30704 static_assert_mantissas_sae!(SAE);
30705 let a: f64x8 = a.as_f64x8();
30706 let b: f64x8 = b.as_f64x8();
30707 let r: i8 = vcmppd(a, b, IMM5, m:k1 as i8, SAE);
30708 r.cast_unsigned()
30709 }
30710}
30711
30712/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30713///
30714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159)
30715#[inline]
30716#[target_feature(enable = "avx512f")]
30717#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30718#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30719pub fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30720 _mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b)
30721}
30722
30723/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30724///
30725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160)
30726#[inline]
30727#[target_feature(enable = "avx512f")]
30728#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30729#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30730pub fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30731 _mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b)
30732}
30733
30734/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30735///
30736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167)
30737#[inline]
30738#[target_feature(enable = "avx512f")]
30739#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30740#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30741pub fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30742 _mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b)
30743}
30744
30745/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30746///
30747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
30748#[inline]
30749#[target_feature(enable = "avx512f")]
30750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30751#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30752pub fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30753 _mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b)
30754}
30755
30756/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30757///
30758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763)
30759#[inline]
30760#[target_feature(enable = "avx512f")]
30761#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30762#[rustc_legacy_const_generics(2)]
30763#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30764pub fn _mm_cmp_ss_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30765 unsafe {
30766 static_assert_uimm_bits!(IMM8, 5);
30767 let neg_one: i8 = -1;
30768 let r: i8 = vcmpss(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
30769 r.cast_unsigned()
30770 }
30771}
30772
30773/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30774///
30775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764)
30776#[inline]
30777#[target_feature(enable = "avx512f")]
30778#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30779#[rustc_legacy_const_generics(3)]
30780#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30781pub fn _mm_mask_cmp_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30782 unsafe {
30783 static_assert_uimm_bits!(IMM8, 5);
30784 let r: i8 = vcmpss(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
30785 r.cast_unsigned()
30786 }
30787}
30788
30789/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30790/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30791///
30792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757)
30793#[inline]
30794#[target_feature(enable = "avx512f")]
30795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30796#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30797#[rustc_legacy_const_generics(2, 3)]
30798pub fn _mm_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> __mmask8 {
30799 unsafe {
30800 static_assert_uimm_bits!(IMM5, 5);
30801 static_assert_mantissas_sae!(SAE);
30802 let neg_one: i8 = -1;
30803 let r: i8 = vcmpss(a, b, IMM5, m:neg_one, SAE);
30804 r.cast_unsigned()
30805 }
30806}
30807
30808/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
30809/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30810///
30811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758)
30812#[inline]
30813#[target_feature(enable = "avx512f")]
30814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30815#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30816#[rustc_legacy_const_generics(3, 4)]
30817pub fn _mm_mask_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
30818 k1: __mmask8,
30819 a: __m128,
30820 b: __m128,
30821) -> __mmask8 {
30822 unsafe {
30823 static_assert_uimm_bits!(IMM5, 5);
30824 static_assert_mantissas_sae!(SAE);
30825 let r: i8 = vcmpss(a, b, IMM5, m:k1 as i8, SAE);
30826 r.cast_unsigned()
30827 }
30828}
30829
30830/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30831///
30832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760)
30833#[inline]
30834#[target_feature(enable = "avx512f")]
30835#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30836#[rustc_legacy_const_generics(2)]
30837#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30838pub fn _mm_cmp_sd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30839 unsafe {
30840 static_assert_uimm_bits!(IMM8, 5);
30841 let neg_one: i8 = -1;
30842 let r: i8 = vcmpsd(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
30843 r.cast_unsigned()
30844 }
30845}
30846
30847/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30848///
30849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761)
30850#[inline]
30851#[target_feature(enable = "avx512f")]
30852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30853#[rustc_legacy_const_generics(3)]
30854#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30855pub fn _mm_mask_cmp_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30856 unsafe {
30857 static_assert_uimm_bits!(IMM8, 5);
30858 let r: i8 = vcmpsd(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
30859 r.cast_unsigned()
30860 }
30861}
30862
30863/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30864/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30865///
30866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755)
30867#[inline]
30868#[target_feature(enable = "avx512f")]
30869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30870#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30871#[rustc_legacy_const_generics(2, 3)]
30872pub fn _mm_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30873 unsafe {
30874 static_assert_uimm_bits!(IMM5, 5);
30875 static_assert_mantissas_sae!(SAE);
30876 let neg_one: i8 = -1;
30877 let r: i8 = vcmpsd(a, b, IMM5, m:neg_one, SAE);
30878 r.cast_unsigned()
30879 }
30880}
30881
30882/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
30883/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30884///
30885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756)
30886#[inline]
30887#[target_feature(enable = "avx512f")]
30888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30889#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30890#[rustc_legacy_const_generics(3, 4)]
30891pub fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
30892 k1: __mmask8,
30893 a: __m128d,
30894 b: __m128d,
30895) -> __mmask8 {
30896 unsafe {
30897 static_assert_uimm_bits!(IMM5, 5);
30898 static_assert_mantissas_sae!(SAE);
30899 let r: i8 = vcmpsd(a, b, IMM5, m:k1 as i8, SAE);
30900 r.cast_unsigned()
30901 }
30902}
30903
30904/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30905///
30906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056)
30907#[inline]
30908#[target_feature(enable = "avx512f")]
30909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30910#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30911pub fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30912 unsafe { simd_bitmask::<u32x16, _>(simd_lt(x:a.as_u32x16(), y:b.as_u32x16())) }
30913}
30914
30915/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30916///
30917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
30918#[inline]
30919#[target_feature(enable = "avx512f")]
30920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30921#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30922pub fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30923 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30924}
30925
30926/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30927///
30928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054)
30929#[inline]
30930#[target_feature(enable = "avx512f,avx512vl")]
30931#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30932#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30933pub fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30934 unsafe { simd_bitmask::<u32x8, _>(simd_lt(x:a.as_u32x8(), y:b.as_u32x8())) }
30935}
30936
30937/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30938///
30939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
30940#[inline]
30941#[target_feature(enable = "avx512f,avx512vl")]
30942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30943#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30944pub fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30945 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30946}
30947
30948/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30949///
30950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052)
30951#[inline]
30952#[target_feature(enable = "avx512f,avx512vl")]
30953#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30954#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30955pub fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30956 unsafe { simd_bitmask::<u32x4, _>(simd_lt(x:a.as_u32x4(), y:b.as_u32x4())) }
30957}
30958
30959/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30960///
30961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
30962#[inline]
30963#[target_feature(enable = "avx512f,avx512vl")]
30964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30965#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30966pub fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30967 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30968}
30969
30970/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30971///
30972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933)
30973#[inline]
30974#[target_feature(enable = "avx512f")]
30975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30976#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30977pub fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30978 unsafe { simd_bitmask::<u32x16, _>(simd_gt(x:a.as_u32x16(), y:b.as_u32x16())) }
30979}
30980
30981/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30982///
30983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
30984#[inline]
30985#[target_feature(enable = "avx512f")]
30986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30987#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30988pub fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30989 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
30990}
30991
30992/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30993///
30994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931)
30995#[inline]
30996#[target_feature(enable = "avx512f,avx512vl")]
30997#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
30998#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30999pub fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31000 unsafe { simd_bitmask::<u32x8, _>(simd_gt(x:a.as_u32x8(), y:b.as_u32x8())) }
31001}
31002
31003/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31004///
31005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932)
31006#[inline]
31007#[target_feature(enable = "avx512f,avx512vl")]
31008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31009#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31010pub fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31011 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31012}
31013
31014/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31015///
31016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929)
31017#[inline]
31018#[target_feature(enable = "avx512f,avx512vl")]
31019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31020#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31021pub fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31022 unsafe { simd_bitmask::<u32x4, _>(simd_gt(x:a.as_u32x4(), y:b.as_u32x4())) }
31023}
31024
31025/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31026///
31027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930)
31028#[inline]
31029#[target_feature(enable = "avx512f,avx512vl")]
31030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31031#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31032pub fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31033 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31034}
31035
31036/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31037///
31038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995)
31039#[inline]
31040#[target_feature(enable = "avx512f")]
31041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31042#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31043pub fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31044 unsafe { simd_bitmask::<u32x16, _>(simd_le(x:a.as_u32x16(), y:b.as_u32x16())) }
31045}
31046
31047/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31048///
31049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
31050#[inline]
31051#[target_feature(enable = "avx512f")]
31052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31053#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31054pub fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31055 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31056}
31057
31058/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31059///
31060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993)
31061#[inline]
31062#[target_feature(enable = "avx512f,avx512vl")]
31063#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31064#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31065pub fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31066 unsafe { simd_bitmask::<u32x8, _>(simd_le(x:a.as_u32x8(), y:b.as_u32x8())) }
31067}
31068
31069/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31070///
31071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
31072#[inline]
31073#[target_feature(enable = "avx512f,avx512vl")]
31074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31075#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31076pub fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31077 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31078}
31079
31080/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31081///
31082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991)
31083#[inline]
31084#[target_feature(enable = "avx512f,avx512vl")]
31085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31086#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31087pub fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31088 unsafe { simd_bitmask::<u32x4, _>(simd_le(x:a.as_u32x4(), y:b.as_u32x4())) }
31089}
31090
31091/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31092///
31093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
31094#[inline]
31095#[target_feature(enable = "avx512f,avx512vl")]
31096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31097#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31098pub fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31099 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31100}
31101
31102/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31103///
31104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873)
31105#[inline]
31106#[target_feature(enable = "avx512f")]
31107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31108#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31109pub fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31110 unsafe { simd_bitmask::<u32x16, _>(simd_ge(x:a.as_u32x16(), y:b.as_u32x16())) }
31111}
31112
31113/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31114///
31115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874)
31116#[inline]
31117#[target_feature(enable = "avx512f")]
31118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31119#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31120pub fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31121 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31122}
31123
31124/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31125///
31126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871)
31127#[inline]
31128#[target_feature(enable = "avx512f,avx512vl")]
31129#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31130#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31131pub fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31132 unsafe { simd_bitmask::<u32x8, _>(simd_ge(x:a.as_u32x8(), y:b.as_u32x8())) }
31133}
31134
31135/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31136///
31137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872)
31138#[inline]
31139#[target_feature(enable = "avx512f,avx512vl")]
31140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31141#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31142pub fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31143 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31144}
31145
31146/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31147///
31148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869)
31149#[inline]
31150#[target_feature(enable = "avx512f,avx512vl")]
31151#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31152#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31153pub fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31154 unsafe { simd_bitmask::<u32x4, _>(simd_ge(x:a.as_u32x4(), y:b.as_u32x4())) }
31155}
31156
31157/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31158///
31159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870)
31160#[inline]
31161#[target_feature(enable = "avx512f,avx512vl")]
31162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31163#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31164pub fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31165 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31166}
31167
31168/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31169///
31170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807)
31171#[inline]
31172#[target_feature(enable = "avx512f")]
31173#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31174#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31175pub fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31176 unsafe { simd_bitmask::<u32x16, _>(simd_eq(x:a.as_u32x16(), y:b.as_u32x16())) }
31177}
31178
31179/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31180///
31181/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
31182#[inline]
31183#[target_feature(enable = "avx512f")]
31184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31185#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31186pub fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31187 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31188}
31189
31190/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31191///
31192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805)
31193#[inline]
31194#[target_feature(enable = "avx512f,avx512vl")]
31195#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31196#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31197pub fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31198 unsafe { simd_bitmask::<u32x8, _>(simd_eq(x:a.as_u32x8(), y:b.as_u32x8())) }
31199}
31200
31201/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31202///
31203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806)
31204#[inline]
31205#[target_feature(enable = "avx512f,avx512vl")]
31206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31207#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31208pub fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31209 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31210}
31211
31212/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31213///
31214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803)
31215#[inline]
31216#[target_feature(enable = "avx512f,avx512vl")]
31217#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31218#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31219pub fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31220 unsafe { simd_bitmask::<u32x4, _>(simd_eq(x:a.as_u32x4(), y:b.as_u32x4())) }
31221}
31222
31223/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31224///
31225/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804)
31226#[inline]
31227#[target_feature(enable = "avx512f,avx512vl")]
31228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31229#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31230pub fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31231 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31232}
31233
31234/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31235///
31236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112)
31237#[inline]
31238#[target_feature(enable = "avx512f")]
31239#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31240#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31241pub fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31242 unsafe { simd_bitmask::<u32x16, _>(simd_ne(x:a.as_u32x16(), y:b.as_u32x16())) }
31243}
31244
31245/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31246///
31247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
31248#[inline]
31249#[target_feature(enable = "avx512f")]
31250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31251#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31252pub fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31253 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31254}
31255
31256/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31257///
31258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110)
31259#[inline]
31260#[target_feature(enable = "avx512f,avx512vl")]
31261#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31262#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31263pub fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31264 unsafe { simd_bitmask::<u32x8, _>(simd_ne(x:a.as_u32x8(), y:b.as_u32x8())) }
31265}
31266
31267/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31268///
31269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111)
31270#[inline]
31271#[target_feature(enable = "avx512f,avx512vl")]
31272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31273#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31274pub fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31275 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31276}
31277
31278/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31279///
31280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108)
31281#[inline]
31282#[target_feature(enable = "avx512f,avx512vl")]
31283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31284#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31285pub fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31286 unsafe { simd_bitmask::<u32x4, _>(simd_ne(x:a.as_u32x4(), y:b.as_u32x4())) }
31287}
31288
31289/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31290///
31291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109)
31292#[inline]
31293#[target_feature(enable = "avx512f,avx512vl")]
31294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31295#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31296pub fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31297 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31298}
31299
31300/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31301///
31302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721)
31303#[inline]
31304#[target_feature(enable = "avx512f")]
31305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31306#[rustc_legacy_const_generics(2)]
31307#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31308pub fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31309 unsafe {
31310 static_assert_uimm_bits!(IMM3, 3);
31311 let a: u32x16 = a.as_u32x16();
31312 let b: u32x16 = b.as_u32x16();
31313 let r: i32x16 = match IMM3 {
31314 0 => simd_eq(x:a, y:b),
31315 1 => simd_lt(x:a, y:b),
31316 2 => simd_le(x:a, y:b),
31317 3 => i32x16::ZERO,
31318 4 => simd_ne(x:a, y:b),
31319 5 => simd_ge(x:a, y:b),
31320 6 => simd_gt(x:a, y:b),
31321 _ => i32x16::splat(-1),
31322 };
31323 simd_bitmask(r)
31324 }
31325}
31326
31327/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31328///
31329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722)
31330#[inline]
31331#[target_feature(enable = "avx512f")]
31332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31333#[rustc_legacy_const_generics(3)]
31334#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31335pub fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31336 k1: __mmask16,
31337 a: __m512i,
31338 b: __m512i,
31339) -> __mmask16 {
31340 unsafe {
31341 static_assert_uimm_bits!(IMM3, 3);
31342 let a: u32x16 = a.as_u32x16();
31343 let b: u32x16 = b.as_u32x16();
31344 let k1: i32x16 = simd_select_bitmask(m:k1, yes:i32x16::splat(-1), no:i32x16::ZERO);
31345 let r: i32x16 = match IMM3 {
31346 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
31347 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
31348 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
31349 3 => i32x16::ZERO,
31350 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
31351 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
31352 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
31353 _ => k1,
31354 };
31355 simd_bitmask(r)
31356 }
31357}
31358
31359/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31360///
31361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719)
31362#[inline]
31363#[target_feature(enable = "avx512f,avx512vl")]
31364#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31365#[rustc_legacy_const_generics(2)]
31366#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31367pub fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31368 unsafe {
31369 static_assert_uimm_bits!(IMM3, 3);
31370 let a: u32x8 = a.as_u32x8();
31371 let b: u32x8 = b.as_u32x8();
31372 let r: i32x8 = match IMM3 {
31373 0 => simd_eq(x:a, y:b),
31374 1 => simd_lt(x:a, y:b),
31375 2 => simd_le(x:a, y:b),
31376 3 => i32x8::ZERO,
31377 4 => simd_ne(x:a, y:b),
31378 5 => simd_ge(x:a, y:b),
31379 6 => simd_gt(x:a, y:b),
31380 _ => i32x8::splat(-1),
31381 };
31382 simd_bitmask(r)
31383 }
31384}
31385
31386/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31387///
31388/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720)
31389#[inline]
31390#[target_feature(enable = "avx512f,avx512vl")]
31391#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31392#[rustc_legacy_const_generics(3)]
31393#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31394pub fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31395 k1: __mmask8,
31396 a: __m256i,
31397 b: __m256i,
31398) -> __mmask8 {
31399 unsafe {
31400 static_assert_uimm_bits!(IMM3, 3);
31401 let a: u32x8 = a.as_u32x8();
31402 let b: u32x8 = b.as_u32x8();
31403 let k1: i32x8 = simd_select_bitmask(m:k1, yes:i32x8::splat(-1), no:i32x8::ZERO);
31404 let r: i32x8 = match IMM3 {
31405 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
31406 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
31407 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
31408 3 => i32x8::ZERO,
31409 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
31410 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
31411 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
31412 _ => k1,
31413 };
31414 simd_bitmask(r)
31415 }
31416}
31417
31418/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31419///
31420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717)
31421#[inline]
31422#[target_feature(enable = "avx512f,avx512vl")]
31423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31424#[rustc_legacy_const_generics(2)]
31425#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31426pub fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31427 unsafe {
31428 static_assert_uimm_bits!(IMM3, 3);
31429 let a: u32x4 = a.as_u32x4();
31430 let b: u32x4 = b.as_u32x4();
31431 let r: i32x4 = match IMM3 {
31432 0 => simd_eq(x:a, y:b),
31433 1 => simd_lt(x:a, y:b),
31434 2 => simd_le(x:a, y:b),
31435 3 => i32x4::ZERO,
31436 4 => simd_ne(x:a, y:b),
31437 5 => simd_ge(x:a, y:b),
31438 6 => simd_gt(x:a, y:b),
31439 _ => i32x4::splat(-1),
31440 };
31441 simd_bitmask(r)
31442 }
31443}
31444
31445/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31446///
31447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718)
31448#[inline]
31449#[target_feature(enable = "avx512f,avx512vl")]
31450#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31451#[rustc_legacy_const_generics(3)]
31452#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31453pub fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31454 k1: __mmask8,
31455 a: __m128i,
31456 b: __m128i,
31457) -> __mmask8 {
31458 unsafe {
31459 static_assert_uimm_bits!(IMM3, 3);
31460 let a: u32x4 = a.as_u32x4();
31461 let b: u32x4 = b.as_u32x4();
31462 let k1: i32x4 = simd_select_bitmask(m:k1, yes:i32x4::splat(-1), no:i32x4::ZERO);
31463 let r: i32x4 = match IMM3 {
31464 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
31465 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
31466 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
31467 3 => i32x4::ZERO,
31468 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
31469 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
31470 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
31471 _ => k1,
31472 };
31473 simd_bitmask(r)
31474 }
31475}
31476
31477/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31478///
31479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029)
31480#[inline]
31481#[target_feature(enable = "avx512f")]
31482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31483#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31484pub fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31485 unsafe { simd_bitmask::<i32x16, _>(simd_lt(x:a.as_i32x16(), y:b.as_i32x16())) }
31486}
31487
31488/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31489///
31490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
31491#[inline]
31492#[target_feature(enable = "avx512f")]
31493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31494#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31495pub fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31496 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31497}
31498
31499/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31500///
31501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027)
31502#[inline]
31503#[target_feature(enable = "avx512f,avx512vl")]
31504#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31505#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31506pub fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31507 unsafe { simd_bitmask::<i32x8, _>(simd_lt(x:a.as_i32x8(), y:b.as_i32x8())) }
31508}
31509
31510/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31511///
31512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
31513#[inline]
31514#[target_feature(enable = "avx512f,avx512vl")]
31515#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31516#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31517pub fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31518 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31519}
31520
31521/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31522///
31523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025)
31524#[inline]
31525#[target_feature(enable = "avx512f,avx512vl")]
31526#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31527#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31528pub fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31529 unsafe { simd_bitmask::<i32x4, _>(simd_lt(x:a.as_i32x4(), y:b.as_i32x4())) }
31530}
31531
31532/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31533///
31534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
31535#[inline]
31536#[target_feature(enable = "avx512f,avx512vl")]
31537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31538#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31539pub fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31540 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31541}
31542
31543/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31544///
31545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905)
31546#[inline]
31547#[target_feature(enable = "avx512f")]
31548#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31549#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31550pub fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31551 unsafe { simd_bitmask::<i32x16, _>(simd_gt(x:a.as_i32x16(), y:b.as_i32x16())) }
31552}
31553
31554/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31555///
31556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
31557#[inline]
31558#[target_feature(enable = "avx512f")]
31559#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31560#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31561pub fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31562 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31563}
31564
31565/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31566///
31567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903)
31568#[inline]
31569#[target_feature(enable = "avx512f,avx512vl")]
31570#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31571#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31572pub fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31573 unsafe { simd_bitmask::<i32x8, _>(simd_gt(x:a.as_i32x8(), y:b.as_i32x8())) }
31574}
31575
31576/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31577///
31578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904)
31579#[inline]
31580#[target_feature(enable = "avx512f,avx512vl")]
31581#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31582#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31583pub fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31584 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31585}
31586
31587/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31588///
31589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901)
31590#[inline]
31591#[target_feature(enable = "avx512f,avx512vl")]
31592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31593#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31594pub fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31595 unsafe { simd_bitmask::<i32x4, _>(simd_gt(x:a.as_i32x4(), y:b.as_i32x4())) }
31596}
31597
31598/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31599///
31600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902)
31601#[inline]
31602#[target_feature(enable = "avx512f,avx512vl")]
31603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31604#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31605pub fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31606 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31607}
31608
31609/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31610///
31611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971)
31612#[inline]
31613#[target_feature(enable = "avx512f")]
31614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31615#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31616pub fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31617 unsafe { simd_bitmask::<i32x16, _>(simd_le(x:a.as_i32x16(), y:b.as_i32x16())) }
31618}
31619
31620/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31621///
31622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
31623#[inline]
31624#[target_feature(enable = "avx512f")]
31625#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31626#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31627pub fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31628 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31629}
31630
31631/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31632///
31633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969)
31634#[inline]
31635#[target_feature(enable = "avx512f,avx512vl")]
31636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31637#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31638pub fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31639 unsafe { simd_bitmask::<i32x8, _>(simd_le(x:a.as_i32x8(), y:b.as_i32x8())) }
31640}
31641
31642/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31643///
31644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
31645#[inline]
31646#[target_feature(enable = "avx512f,avx512vl")]
31647#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31648#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31649pub fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31650 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31651}
31652
31653/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31654///
31655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967)
31656#[inline]
31657#[target_feature(enable = "avx512f,avx512vl")]
31658#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31659#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31660pub fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31661 unsafe { simd_bitmask::<i32x4, _>(simd_le(x:a.as_i32x4(), y:b.as_i32x4())) }
31662}
31663
31664/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31665///
31666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
31667#[inline]
31668#[target_feature(enable = "avx512f,avx512vl")]
31669#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31670#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31671pub fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31672 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31673}
31674
31675/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31676///
31677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849)
31678#[inline]
31679#[target_feature(enable = "avx512f")]
31680#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31681#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31682pub fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31683 unsafe { simd_bitmask::<i32x16, _>(simd_ge(x:a.as_i32x16(), y:b.as_i32x16())) }
31684}
31685
31686/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31687///
31688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850)
31689#[inline]
31690#[target_feature(enable = "avx512f")]
31691#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31692#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31693pub fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31694 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31695}
31696
31697/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31698///
31699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847)
31700#[inline]
31701#[target_feature(enable = "avx512f,avx512vl")]
31702#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31703#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31704pub fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31705 unsafe { simd_bitmask::<i32x8, _>(simd_ge(x:a.as_i32x8(), y:b.as_i32x8())) }
31706}
31707
31708/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31709///
31710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848)
31711#[inline]
31712#[target_feature(enable = "avx512f,avx512vl")]
31713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31714#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31715pub fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31716 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31717}
31718
31719/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31720///
31721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845)
31722#[inline]
31723#[target_feature(enable = "avx512f,avx512vl")]
31724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31725#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31726pub fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31727 unsafe { simd_bitmask::<i32x4, _>(simd_ge(x:a.as_i32x4(), y:b.as_i32x4())) }
31728}
31729
31730/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31731///
31732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846)
31733#[inline]
31734#[target_feature(enable = "avx512f,avx512vl")]
31735#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31736#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31737pub fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31738 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31739}
31740
31741/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31742///
31743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779)
31744#[inline]
31745#[target_feature(enable = "avx512f")]
31746#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31747#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31748pub fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31749 unsafe { simd_bitmask::<i32x16, _>(simd_eq(x:a.as_i32x16(), y:b.as_i32x16())) }
31750}
31751
31752/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31753///
31754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
31755#[inline]
31756#[target_feature(enable = "avx512f")]
31757#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31758#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31759pub fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31760 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31761}
31762
31763/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31764///
31765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777)
31766#[inline]
31767#[target_feature(enable = "avx512f,avx512vl")]
31768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31769#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31770pub fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31771 unsafe { simd_bitmask::<i32x8, _>(simd_eq(x:a.as_i32x8(), y:b.as_i32x8())) }
31772}
31773
31774/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31775///
31776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778)
31777#[inline]
31778#[target_feature(enable = "avx512f,avx512vl")]
31779#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31780#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31781pub fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31782 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31783}
31784
31785/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31786///
31787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775)
31788#[inline]
31789#[target_feature(enable = "avx512f,avx512vl")]
31790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31791#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31792pub fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31793 unsafe { simd_bitmask::<i32x4, _>(simd_eq(x:a.as_i32x4(), y:b.as_i32x4())) }
31794}
31795
31796/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31797///
31798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776)
31799#[inline]
31800#[target_feature(enable = "avx512f,avx512vl")]
31801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31802#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31803pub fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31804 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31805}
31806
31807/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31808///
31809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088)
31810#[inline]
31811#[target_feature(enable = "avx512f")]
31812#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31813#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31814pub fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31815 unsafe { simd_bitmask::<i32x16, _>(simd_ne(x:a.as_i32x16(), y:b.as_i32x16())) }
31816}
31817
31818/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31819///
31820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
31821#[inline]
31822#[target_feature(enable = "avx512f")]
31823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31824#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31825pub fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31826 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31827}
31828
31829/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31830///
31831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086)
31832#[inline]
31833#[target_feature(enable = "avx512f,avx512vl")]
31834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31835#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31836pub fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31837 unsafe { simd_bitmask::<i32x8, _>(simd_ne(x:a.as_i32x8(), y:b.as_i32x8())) }
31838}
31839
31840/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31841///
31842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087)
31843#[inline]
31844#[target_feature(enable = "avx512f,avx512vl")]
31845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31846#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31847pub fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31848 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31849}
31850
31851/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31852///
31853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084)
31854#[inline]
31855#[target_feature(enable = "avx512f,avx512vl")]
31856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31857#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31858pub fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31859 unsafe { simd_bitmask::<i32x4, _>(simd_ne(x:a.as_i32x4(), y:b.as_i32x4())) }
31860}
31861
31862/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31863///
31864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085)
31865#[inline]
31866#[target_feature(enable = "avx512f,avx512vl")]
31867#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31868#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31869pub fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31870 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31871}
31872
31873/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31874///
31875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697)
31876#[inline]
31877#[target_feature(enable = "avx512f")]
31878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31879#[rustc_legacy_const_generics(2)]
31880#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31881pub fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31882 unsafe {
31883 static_assert_uimm_bits!(IMM3, 3);
31884 let a: i32x16 = a.as_i32x16();
31885 let b: i32x16 = b.as_i32x16();
31886 let r: i32x16 = match IMM3 {
31887 0 => simd_eq(x:a, y:b),
31888 1 => simd_lt(x:a, y:b),
31889 2 => simd_le(x:a, y:b),
31890 3 => i32x16::ZERO,
31891 4 => simd_ne(x:a, y:b),
31892 5 => simd_ge(x:a, y:b),
31893 6 => simd_gt(x:a, y:b),
31894 _ => i32x16::splat(-1),
31895 };
31896 simd_bitmask(r)
31897 }
31898}
31899
31900/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31901///
31902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698)
31903#[inline]
31904#[target_feature(enable = "avx512f")]
31905#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31906#[rustc_legacy_const_generics(3)]
31907#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31908pub fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31909 k1: __mmask16,
31910 a: __m512i,
31911 b: __m512i,
31912) -> __mmask16 {
31913 unsafe {
31914 static_assert_uimm_bits!(IMM3, 3);
31915 let a: i32x16 = a.as_i32x16();
31916 let b: i32x16 = b.as_i32x16();
31917 let k1: i32x16 = simd_select_bitmask(m:k1, yes:i32x16::splat(-1), no:i32x16::ZERO);
31918 let r: i32x16 = match IMM3 {
31919 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
31920 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
31921 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
31922 3 => i32x16::ZERO,
31923 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
31924 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
31925 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
31926 _ => k1,
31927 };
31928 simd_bitmask(r)
31929 }
31930}
31931
31932/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31933///
31934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695)
31935#[inline]
31936#[target_feature(enable = "avx512f,avx512vl")]
31937#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31938#[rustc_legacy_const_generics(2)]
31939#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31940pub fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31941 unsafe {
31942 static_assert_uimm_bits!(IMM3, 3);
31943 let a: i32x8 = a.as_i32x8();
31944 let b: i32x8 = b.as_i32x8();
31945 let r: i32x8 = match IMM3 {
31946 0 => simd_eq(x:a, y:b),
31947 1 => simd_lt(x:a, y:b),
31948 2 => simd_le(x:a, y:b),
31949 3 => i32x8::ZERO,
31950 4 => simd_ne(x:a, y:b),
31951 5 => simd_ge(x:a, y:b),
31952 6 => simd_gt(x:a, y:b),
31953 _ => i32x8::splat(-1),
31954 };
31955 simd_bitmask(r)
31956 }
31957}
31958
31959/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31960///
31961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696)
31962#[inline]
31963#[target_feature(enable = "avx512f,avx512vl")]
31964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31965#[rustc_legacy_const_generics(3)]
31966#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31967pub fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31968 k1: __mmask8,
31969 a: __m256i,
31970 b: __m256i,
31971) -> __mmask8 {
31972 unsafe {
31973 static_assert_uimm_bits!(IMM3, 3);
31974 let a: i32x8 = a.as_i32x8();
31975 let b: i32x8 = b.as_i32x8();
31976 let k1: i32x8 = simd_select_bitmask(m:k1, yes:i32x8::splat(-1), no:i32x8::ZERO);
31977 let r: i32x8 = match IMM3 {
31978 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
31979 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
31980 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
31981 3 => i32x8::ZERO,
31982 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
31983 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
31984 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
31985 _ => k1,
31986 };
31987 simd_bitmask(r)
31988 }
31989}
31990
31991/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31992///
31993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693)
31994#[inline]
31995#[target_feature(enable = "avx512f,avx512vl")]
31996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
31997#[rustc_legacy_const_generics(2)]
31998#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31999pub fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
32000 unsafe {
32001 static_assert_uimm_bits!(IMM3, 3);
32002 let a: i32x4 = a.as_i32x4();
32003 let b: i32x4 = b.as_i32x4();
32004 let r: i32x4 = match IMM3 {
32005 0 => simd_eq(x:a, y:b),
32006 1 => simd_lt(x:a, y:b),
32007 2 => simd_le(x:a, y:b),
32008 3 => i32x4::ZERO,
32009 4 => simd_ne(x:a, y:b),
32010 5 => simd_ge(x:a, y:b),
32011 6 => simd_gt(x:a, y:b),
32012 _ => i32x4::splat(-1),
32013 };
32014 simd_bitmask(r)
32015 }
32016}
32017
32018/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32019///
32020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694)
32021#[inline]
32022#[target_feature(enable = "avx512f,avx512vl")]
32023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32024#[rustc_legacy_const_generics(3)]
32025#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32026pub fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
32027 k1: __mmask8,
32028 a: __m128i,
32029 b: __m128i,
32030) -> __mmask8 {
32031 unsafe {
32032 static_assert_uimm_bits!(IMM3, 3);
32033 let a: i32x4 = a.as_i32x4();
32034 let b: i32x4 = b.as_i32x4();
32035 let k1: i32x4 = simd_select_bitmask(m:k1, yes:i32x4::splat(-1), no:i32x4::ZERO);
32036 let r: i32x4 = match IMM3 {
32037 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32038 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32039 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
32040 3 => i32x4::ZERO,
32041 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32042 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32043 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32044 _ => k1,
32045 };
32046 simd_bitmask(r)
32047 }
32048}
32049
32050/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32051///
32052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062)
32053#[inline]
32054#[target_feature(enable = "avx512f")]
32055#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32056#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32057pub fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32058 unsafe { simd_bitmask::<__m512i, _>(simd_lt(x:a.as_u64x8(), y:b.as_u64x8())) }
32059}
32060
32061/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32062///
32063/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
32064#[inline]
32065#[target_feature(enable = "avx512f")]
32066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32067#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32068pub fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32069 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32070}
32071
32072/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32073///
32074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060)
32075#[inline]
32076#[target_feature(enable = "avx512f,avx512vl")]
32077#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32078#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32079pub fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32080 unsafe { simd_bitmask::<__m256i, _>(simd_lt(x:a.as_u64x4(), y:b.as_u64x4())) }
32081}
32082
32083/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32084///
32085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061)
32086#[inline]
32087#[target_feature(enable = "avx512f,avx512vl")]
32088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32089#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32090pub fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32091 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32092}
32093
32094/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32095///
32096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058)
32097#[inline]
32098#[target_feature(enable = "avx512f,avx512vl")]
32099#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32100#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32101pub fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32102 unsafe { simd_bitmask::<__m128i, _>(simd_lt(x:a.as_u64x2(), y:b.as_u64x2())) }
32103}
32104
32105/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32106///
32107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059)
32108#[inline]
32109#[target_feature(enable = "avx512f,avx512vl")]
32110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32111#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32112pub fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32113 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32114}
32115
32116/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32117///
32118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939)
32119#[inline]
32120#[target_feature(enable = "avx512f")]
32121#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32122#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32123pub fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32124 unsafe { simd_bitmask::<__m512i, _>(simd_gt(x:a.as_u64x8(), y:b.as_u64x8())) }
32125}
32126
32127/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32128///
32129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
32130#[inline]
32131#[target_feature(enable = "avx512f")]
32132#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32133#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32134pub fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32135 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32136}
32137
32138/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32139///
32140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937)
32141#[inline]
32142#[target_feature(enable = "avx512f,avx512vl")]
32143#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32144#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32145pub fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32146 unsafe { simd_bitmask::<__m256i, _>(simd_gt(x:a.as_u64x4(), y:b.as_u64x4())) }
32147}
32148
32149/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32150///
32151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938)
32152#[inline]
32153#[target_feature(enable = "avx512f,avx512vl")]
32154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32155#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32156pub fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32157 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32158}
32159
32160/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32161///
32162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935)
32163#[inline]
32164#[target_feature(enable = "avx512f,avx512vl")]
32165#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32166#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32167pub fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32168 unsafe { simd_bitmask::<__m128i, _>(simd_gt(x:a.as_u64x2(), y:b.as_u64x2())) }
32169}
32170
32171/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32172///
32173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936)
32174#[inline]
32175#[target_feature(enable = "avx512f,avx512vl")]
32176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32177#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32178pub fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32179 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32180}
32181
32182/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32183///
32184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001)
32185#[inline]
32186#[target_feature(enable = "avx512f")]
32187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32188#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32189pub fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32190 unsafe { simd_bitmask::<__m512i, _>(simd_le(x:a.as_u64x8(), y:b.as_u64x8())) }
32191}
32192
32193/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32194///
32195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002)
32196#[inline]
32197#[target_feature(enable = "avx512f")]
32198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32199#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32200pub fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32201 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32202}
32203
32204/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32205///
32206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999)
32207#[inline]
32208#[target_feature(enable = "avx512f,avx512vl")]
32209#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32210#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32211pub fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32212 unsafe { simd_bitmask::<__m256i, _>(simd_le(x:a.as_u64x4(), y:b.as_u64x4())) }
32213}
32214
32215/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32216///
32217/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000)
32218#[inline]
32219#[target_feature(enable = "avx512f,avx512vl")]
32220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32221#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32222pub fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32223 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32224}
32225
32226/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32227///
32228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997)
32229#[inline]
32230#[target_feature(enable = "avx512f,avx512vl")]
32231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32232#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32233pub fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32234 unsafe { simd_bitmask::<__m128i, _>(simd_le(x:a.as_u64x2(), y:b.as_u64x2())) }
32235}
32236
32237/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32238///
32239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998)
32240#[inline]
32241#[target_feature(enable = "avx512f,avx512vl")]
32242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32243#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32244pub fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32245 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32246}
32247
32248/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32249///
32250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879)
32251#[inline]
32252#[target_feature(enable = "avx512f")]
32253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32254#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32255pub fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32256 unsafe { simd_bitmask::<__m512i, _>(simd_ge(x:a.as_u64x8(), y:b.as_u64x8())) }
32257}
32258
32259/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32260///
32261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880)
32262#[inline]
32263#[target_feature(enable = "avx512f")]
32264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32265#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32266pub fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32267 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32268}
32269
32270/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32271///
32272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877)
32273#[inline]
32274#[target_feature(enable = "avx512f,avx512vl")]
32275#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32276#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32277pub fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32278 unsafe { simd_bitmask::<__m256i, _>(simd_ge(x:a.as_u64x4(), y:b.as_u64x4())) }
32279}
32280
32281/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32282///
32283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878)
32284#[inline]
32285#[target_feature(enable = "avx512f,avx512vl")]
32286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32287#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32288pub fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32289 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32290}
32291
32292/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32293///
32294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875)
32295#[inline]
32296#[target_feature(enable = "avx512f,avx512vl")]
32297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32298#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32299pub fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32300 unsafe { simd_bitmask::<__m128i, _>(simd_ge(x:a.as_u64x2(), y:b.as_u64x2())) }
32301}
32302
32303/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32304///
32305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876)
32306#[inline]
32307#[target_feature(enable = "avx512f,avx512vl")]
32308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32309#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32310pub fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32311 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32312}
32313
32314/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32315///
32316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813)
32317#[inline]
32318#[target_feature(enable = "avx512f")]
32319#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32320#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32321pub fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32322 unsafe { simd_bitmask::<__m512i, _>(simd_eq(x:a.as_u64x8(), y:b.as_u64x8())) }
32323}
32324
32325/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32326///
32327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
32328#[inline]
32329#[target_feature(enable = "avx512f")]
32330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32331#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32332pub fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32333 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32334}
32335
32336/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32337///
32338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811)
32339#[inline]
32340#[target_feature(enable = "avx512f,avx512vl")]
32341#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32342#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32343pub fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32344 unsafe { simd_bitmask::<__m256i, _>(simd_eq(x:a.as_u64x4(), y:b.as_u64x4())) }
32345}
32346
32347/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32348///
32349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812)
32350#[inline]
32351#[target_feature(enable = "avx512f,avx512vl")]
32352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32353#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32354pub fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32355 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32356}
32357
32358/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32359///
32360/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809)
32361#[inline]
32362#[target_feature(enable = "avx512f,avx512vl")]
32363#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32364#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32365pub fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32366 unsafe { simd_bitmask::<__m128i, _>(simd_eq(x:a.as_u64x2(), y:b.as_u64x2())) }
32367}
32368
32369/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32370///
32371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810)
32372#[inline]
32373#[target_feature(enable = "avx512f,avx512vl")]
32374#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32375#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32376pub fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32377 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32378}
32379
32380/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32381///
32382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118)
32383#[inline]
32384#[target_feature(enable = "avx512f")]
32385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32386#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32387pub fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32388 unsafe { simd_bitmask::<__m512i, _>(simd_ne(x:a.as_u64x8(), y:b.as_u64x8())) }
32389}
32390
32391/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32392///
32393/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
32394#[inline]
32395#[target_feature(enable = "avx512f")]
32396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32397#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32398pub fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32399 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32400}
32401
32402/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32403///
32404/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116)
32405#[inline]
32406#[target_feature(enable = "avx512f,avx512vl")]
32407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32408#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32409pub fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32410 unsafe { simd_bitmask::<__m256i, _>(simd_ne(x:a.as_u64x4(), y:b.as_u64x4())) }
32411}
32412
32413/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32414///
32415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117)
32416#[inline]
32417#[target_feature(enable = "avx512f,avx512vl")]
32418#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32419#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32420pub fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32421 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32422}
32423
32424/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32425///
32426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114)
32427#[inline]
32428#[target_feature(enable = "avx512f,avx512vl")]
32429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32430#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32431pub fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32432 unsafe { simd_bitmask::<__m128i, _>(simd_ne(x:a.as_u64x2(), y:b.as_u64x2())) }
32433}
32434
32435/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32436///
32437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115)
32438#[inline]
32439#[target_feature(enable = "avx512f,avx512vl")]
32440#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32441#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32442pub fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32443 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32444}
32445
32446/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32447///
32448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727)
32449#[inline]
32450#[target_feature(enable = "avx512f")]
32451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32452#[rustc_legacy_const_generics(2)]
32453#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32454pub fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
32455 unsafe {
32456 static_assert_uimm_bits!(IMM3, 3);
32457 let a: u64x8 = a.as_u64x8();
32458 let b: u64x8 = b.as_u64x8();
32459 let r: i64x8 = match IMM3 {
32460 0 => simd_eq(x:a, y:b),
32461 1 => simd_lt(x:a, y:b),
32462 2 => simd_le(x:a, y:b),
32463 3 => i64x8::ZERO,
32464 4 => simd_ne(x:a, y:b),
32465 5 => simd_ge(x:a, y:b),
32466 6 => simd_gt(x:a, y:b),
32467 _ => i64x8::splat(-1),
32468 };
32469 simd_bitmask(r)
32470 }
32471}
32472
32473/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32474///
32475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728)
32476#[inline]
32477#[target_feature(enable = "avx512f")]
32478#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32479#[rustc_legacy_const_generics(3)]
32480#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32481pub fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32482 k1: __mmask8,
32483 a: __m512i,
32484 b: __m512i,
32485) -> __mmask8 {
32486 unsafe {
32487 static_assert_uimm_bits!(IMM3, 3);
32488 let a: u64x8 = a.as_u64x8();
32489 let b: u64x8 = b.as_u64x8();
32490 let k1: i64x8 = simd_select_bitmask(m:k1, yes:i64x8::splat(-1), no:i64x8::ZERO);
32491 let r: i64x8 = match IMM3 {
32492 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32493 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32494 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
32495 3 => i64x8::ZERO,
32496 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32497 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32498 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32499 _ => k1,
32500 };
32501 simd_bitmask(r)
32502 }
32503}
32504
32505/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32506///
32507/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725)
32508#[inline]
32509#[target_feature(enable = "avx512f,avx512vl")]
32510#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32511#[rustc_legacy_const_generics(2)]
32512#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32513pub fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
32514 unsafe {
32515 static_assert_uimm_bits!(IMM3, 3);
32516 let a: u64x4 = a.as_u64x4();
32517 let b: u64x4 = b.as_u64x4();
32518 let r: i64x4 = match IMM3 {
32519 0 => simd_eq(x:a, y:b),
32520 1 => simd_lt(x:a, y:b),
32521 2 => simd_le(x:a, y:b),
32522 3 => i64x4::ZERO,
32523 4 => simd_ne(x:a, y:b),
32524 5 => simd_ge(x:a, y:b),
32525 6 => simd_gt(x:a, y:b),
32526 _ => i64x4::splat(-1),
32527 };
32528 simd_bitmask(r)
32529 }
32530}
32531
32532/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32533///
32534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726)
32535#[inline]
32536#[target_feature(enable = "avx512f,avx512vl")]
32537#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32538#[rustc_legacy_const_generics(3)]
32539#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32540pub fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32541 k1: __mmask8,
32542 a: __m256i,
32543 b: __m256i,
32544) -> __mmask8 {
32545 unsafe {
32546 static_assert_uimm_bits!(IMM3, 3);
32547 let a: u64x4 = a.as_u64x4();
32548 let b: u64x4 = b.as_u64x4();
32549 let k1: i64x4 = simd_select_bitmask(m:k1, yes:i64x4::splat(-1), no:i64x4::ZERO);
32550 let r: i64x4 = match IMM3 {
32551 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32552 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32553 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
32554 3 => i64x4::ZERO,
32555 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32556 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32557 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32558 _ => k1,
32559 };
32560 simd_bitmask(r)
32561 }
32562}
32563
32564/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32565///
32566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723)
32567#[inline]
32568#[target_feature(enable = "avx512f,avx512vl")]
32569#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32570#[rustc_legacy_const_generics(2)]
32571#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32572pub fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
32573 unsafe {
32574 static_assert_uimm_bits!(IMM3, 3);
32575 let a: u64x2 = a.as_u64x2();
32576 let b: u64x2 = b.as_u64x2();
32577 let r: i64x2 = match IMM3 {
32578 0 => simd_eq(x:a, y:b),
32579 1 => simd_lt(x:a, y:b),
32580 2 => simd_le(x:a, y:b),
32581 3 => i64x2::ZERO,
32582 4 => simd_ne(x:a, y:b),
32583 5 => simd_ge(x:a, y:b),
32584 6 => simd_gt(x:a, y:b),
32585 _ => i64x2::splat(-1),
32586 };
32587 simd_bitmask(r)
32588 }
32589}
32590
32591/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32592///
32593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724)
32594#[inline]
32595#[target_feature(enable = "avx512f,avx512vl")]
32596#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32597#[rustc_legacy_const_generics(3)]
32598#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32599pub fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32600 k1: __mmask8,
32601 a: __m128i,
32602 b: __m128i,
32603) -> __mmask8 {
32604 unsafe {
32605 static_assert_uimm_bits!(IMM3, 3);
32606 let a: u64x2 = a.as_u64x2();
32607 let b: u64x2 = b.as_u64x2();
32608 let k1: i64x2 = simd_select_bitmask(m:k1, yes:i64x2::splat(-1), no:i64x2::ZERO);
32609 let r: i64x2 = match IMM3 {
32610 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32611 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32612 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
32613 3 => i64x2::ZERO,
32614 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32615 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32616 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32617 _ => k1,
32618 };
32619 simd_bitmask(r)
32620 }
32621}
32622
32623/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32624///
32625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037)
32626#[inline]
32627#[target_feature(enable = "avx512f")]
32628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32629#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32630pub fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32631 unsafe { simd_bitmask::<__m512i, _>(simd_lt(x:a.as_i64x8(), y:b.as_i64x8())) }
32632}
32633
32634/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32635///
32636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
32637#[inline]
32638#[target_feature(enable = "avx512f")]
32639#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32640#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32641pub fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32642 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32643}
32644
32645/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32646///
32647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035)
32648#[inline]
32649#[target_feature(enable = "avx512f,avx512vl")]
32650#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32651#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32652pub fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32653 unsafe { simd_bitmask::<__m256i, _>(simd_lt(x:a.as_i64x4(), y:b.as_i64x4())) }
32654}
32655
32656/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32657///
32658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036)
32659#[inline]
32660#[target_feature(enable = "avx512f,avx512vl")]
32661#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32662#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32663pub fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32664 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32665}
32666
32667/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32668///
32669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033)
32670#[inline]
32671#[target_feature(enable = "avx512f,avx512vl")]
32672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32673#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32674pub fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32675 unsafe { simd_bitmask::<__m128i, _>(simd_lt(x:a.as_i64x2(), y:b.as_i64x2())) }
32676}
32677
32678/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32679///
32680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034)
32681#[inline]
32682#[target_feature(enable = "avx512f,avx512vl")]
32683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32684#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32685pub fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32686 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32687}
32688
32689/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32690///
32691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913)
32692#[inline]
32693#[target_feature(enable = "avx512f")]
32694#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32695#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32696pub fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32697 unsafe { simd_bitmask::<__m512i, _>(simd_gt(x:a.as_i64x8(), y:b.as_i64x8())) }
32698}
32699
32700/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32701///
32702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
32703#[inline]
32704#[target_feature(enable = "avx512f")]
32705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32706#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32707pub fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32708 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32709}
32710
32711/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32712///
32713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911)
32714#[inline]
32715#[target_feature(enable = "avx512f,avx512vl")]
32716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32717#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32718pub fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32719 unsafe { simd_bitmask::<__m256i, _>(simd_gt(x:a.as_i64x4(), y:b.as_i64x4())) }
32720}
32721
32722/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32723///
32724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912)
32725#[inline]
32726#[target_feature(enable = "avx512f,avx512vl")]
32727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32728#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32729pub fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32730 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32731}
32732
32733/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32734///
32735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909)
32736#[inline]
32737#[target_feature(enable = "avx512f,avx512vl")]
32738#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32739#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32740pub fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32741 unsafe { simd_bitmask::<__m128i, _>(simd_gt(x:a.as_i64x2(), y:b.as_i64x2())) }
32742}
32743
32744/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32745///
32746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910)
32747#[inline]
32748#[target_feature(enable = "avx512f,avx512vl")]
32749#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32750#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32751pub fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32752 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32753}
32754
32755/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32756///
32757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977)
32758#[inline]
32759#[target_feature(enable = "avx512f")]
32760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32761#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32762pub fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32763 unsafe { simd_bitmask::<__m512i, _>(simd_le(x:a.as_i64x8(), y:b.as_i64x8())) }
32764}
32765
32766/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32767///
32768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978)
32769#[inline]
32770#[target_feature(enable = "avx512f")]
32771#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32772#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32773pub fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32774 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32775}
32776
32777/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32778///
32779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975)
32780#[inline]
32781#[target_feature(enable = "avx512f,avx512vl")]
32782#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32783#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32784pub fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32785 unsafe { simd_bitmask::<__m256i, _>(simd_le(x:a.as_i64x4(), y:b.as_i64x4())) }
32786}
32787
32788/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32789///
32790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976)
32791#[inline]
32792#[target_feature(enable = "avx512f,avx512vl")]
32793#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32794#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32795pub fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32796 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32797}
32798
32799/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32800///
32801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973)
32802#[inline]
32803#[target_feature(enable = "avx512f,avx512vl")]
32804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32805#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32806pub fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32807 unsafe { simd_bitmask::<__m128i, _>(simd_le(x:a.as_i64x2(), y:b.as_i64x2())) }
32808}
32809
32810/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32811///
32812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974)
32813#[inline]
32814#[target_feature(enable = "avx512f,avx512vl")]
32815#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32816#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32817pub fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32818 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32819}
32820
32821/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32822///
32823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855)
32824#[inline]
32825#[target_feature(enable = "avx512f")]
32826#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32827#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32828pub fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32829 unsafe { simd_bitmask::<__m512i, _>(simd_ge(x:a.as_i64x8(), y:b.as_i64x8())) }
32830}
32831
32832/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32833///
32834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856)
32835#[inline]
32836#[target_feature(enable = "avx512f")]
32837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32838#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32839pub fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32840 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32841}
32842
32843/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32844///
32845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853)
32846#[inline]
32847#[target_feature(enable = "avx512f,avx512vl")]
32848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32849#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32850pub fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32851 unsafe { simd_bitmask::<__m256i, _>(simd_ge(x:a.as_i64x4(), y:b.as_i64x4())) }
32852}
32853
32854/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32855///
32856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854)
32857#[inline]
32858#[target_feature(enable = "avx512f,avx512vl")]
32859#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32860#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32861pub fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32862 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32863}
32864
32865/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32866///
32867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851)
32868#[inline]
32869#[target_feature(enable = "avx512f,avx512vl")]
32870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32871#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32872pub fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32873 unsafe { simd_bitmask::<__m128i, _>(simd_ge(x:a.as_i64x2(), y:b.as_i64x2())) }
32874}
32875
32876/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32877///
32878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852)
32879#[inline]
32880#[target_feature(enable = "avx512f,avx512vl")]
32881#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32882#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32883pub fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32884 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32885}
32886
32887/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32888///
32889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787)
32890#[inline]
32891#[target_feature(enable = "avx512f")]
32892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32893#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32894pub fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32895 unsafe { simd_bitmask::<__m512i, _>(simd_eq(x:a.as_i64x8(), y:b.as_i64x8())) }
32896}
32897
32898/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32899///
32900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
32901#[inline]
32902#[target_feature(enable = "avx512f")]
32903#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32904#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32905pub fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32906 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32907}
32908
32909/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32910///
32911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785)
32912#[inline]
32913#[target_feature(enable = "avx512f,avx512vl")]
32914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32915#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32916pub fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32917 unsafe { simd_bitmask::<__m256i, _>(simd_eq(x:a.as_i64x4(), y:b.as_i64x4())) }
32918}
32919
32920/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32921///
32922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786)
32923#[inline]
32924#[target_feature(enable = "avx512f,avx512vl")]
32925#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32926#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32927pub fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32928 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32929}
32930
32931/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32932///
32933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783)
32934#[inline]
32935#[target_feature(enable = "avx512f,avx512vl")]
32936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32937#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32938pub fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32939 unsafe { simd_bitmask::<__m128i, _>(simd_eq(x:a.as_i64x2(), y:b.as_i64x2())) }
32940}
32941
32942/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32943///
32944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784)
32945#[inline]
32946#[target_feature(enable = "avx512f,avx512vl")]
32947#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32948#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32949pub fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32950 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32951}
32952
32953/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32954///
32955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094)
32956#[inline]
32957#[target_feature(enable = "avx512f")]
32958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32959#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32960pub fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32961 unsafe { simd_bitmask::<__m512i, _>(simd_ne(x:a.as_i64x8(), y:b.as_i64x8())) }
32962}
32963
32964/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32965///
32966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
32967#[inline]
32968#[target_feature(enable = "avx512f")]
32969#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32970#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32971pub fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32972 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32973}
32974
32975/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32976///
32977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092)
32978#[inline]
32979#[target_feature(enable = "avx512f,avx512vl")]
32980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32981#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32982pub fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32983 unsafe { simd_bitmask::<__m256i, _>(simd_ne(x:a.as_i64x4(), y:b.as_i64x4())) }
32984}
32985
32986/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32987///
32988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093)
32989#[inline]
32990#[target_feature(enable = "avx512f,avx512vl")]
32991#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
32992#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32993pub fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32994 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32995}
32996
32997/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32998///
32999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090)
33000#[inline]
33001#[target_feature(enable = "avx512f,avx512vl")]
33002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33003#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
33004pub fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
33005 unsafe { simd_bitmask::<__m128i, _>(simd_ne(x:a.as_i64x2(), y:b.as_i64x2())) }
33006}
33007
33008/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33009///
33010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091)
33011#[inline]
33012#[target_feature(enable = "avx512f,avx512vl")]
33013#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33014#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
33015pub fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
33016 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
33017}
33018
33019/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33020///
33021/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703)
33022#[inline]
33023#[target_feature(enable = "avx512f")]
33024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33025#[rustc_legacy_const_generics(2)]
33026#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33027pub fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
33028 unsafe {
33029 static_assert_uimm_bits!(IMM3, 3);
33030 let a: i64x8 = a.as_i64x8();
33031 let b: i64x8 = b.as_i64x8();
33032 let r: i64x8 = match IMM3 {
33033 0 => simd_eq(x:a, y:b),
33034 1 => simd_lt(x:a, y:b),
33035 2 => simd_le(x:a, y:b),
33036 3 => i64x8::ZERO,
33037 4 => simd_ne(x:a, y:b),
33038 5 => simd_ge(x:a, y:b),
33039 6 => simd_gt(x:a, y:b),
33040 _ => i64x8::splat(-1),
33041 };
33042 simd_bitmask(r)
33043 }
33044}
33045
33046/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33047///
33048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704)
33049#[inline]
33050#[target_feature(enable = "avx512f")]
33051#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33052#[rustc_legacy_const_generics(3)]
33053#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33054pub fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33055 k1: __mmask8,
33056 a: __m512i,
33057 b: __m512i,
33058) -> __mmask8 {
33059 unsafe {
33060 static_assert_uimm_bits!(IMM3, 3);
33061 let a: i64x8 = a.as_i64x8();
33062 let b: i64x8 = b.as_i64x8();
33063 let k1: i64x8 = simd_select_bitmask(m:k1, yes:i64x8::splat(-1), no:i64x8::ZERO);
33064 let r: i64x8 = match IMM3 {
33065 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
33066 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
33067 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
33068 3 => i64x8::ZERO,
33069 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
33070 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
33071 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
33072 _ => k1,
33073 };
33074 simd_bitmask(r)
33075 }
33076}
33077
33078/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33079///
33080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701)
33081#[inline]
33082#[target_feature(enable = "avx512f,avx512vl")]
33083#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33084#[rustc_legacy_const_generics(2)]
33085#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33086pub fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
33087 unsafe {
33088 static_assert_uimm_bits!(IMM3, 3);
33089 let a: i64x4 = a.as_i64x4();
33090 let b: i64x4 = b.as_i64x4();
33091 let r: i64x4 = match IMM3 {
33092 0 => simd_eq(x:a, y:b),
33093 1 => simd_lt(x:a, y:b),
33094 2 => simd_le(x:a, y:b),
33095 3 => i64x4::ZERO,
33096 4 => simd_ne(x:a, y:b),
33097 5 => simd_ge(x:a, y:b),
33098 6 => simd_gt(x:a, y:b),
33099 _ => i64x4::splat(-1),
33100 };
33101 simd_bitmask(r)
33102 }
33103}
33104
33105/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33106///
33107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702)
33108#[inline]
33109#[target_feature(enable = "avx512f,avx512vl")]
33110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33111#[rustc_legacy_const_generics(3)]
33112#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33113pub fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33114 k1: __mmask8,
33115 a: __m256i,
33116 b: __m256i,
33117) -> __mmask8 {
33118 unsafe {
33119 static_assert_uimm_bits!(IMM3, 3);
33120 let a: i64x4 = a.as_i64x4();
33121 let b: i64x4 = b.as_i64x4();
33122 let k1: i64x4 = simd_select_bitmask(m:k1, yes:i64x4::splat(-1), no:i64x4::ZERO);
33123 let r: i64x4 = match IMM3 {
33124 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
33125 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
33126 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
33127 3 => i64x4::ZERO,
33128 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
33129 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
33130 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
33131 _ => k1,
33132 };
33133 simd_bitmask(r)
33134 }
33135}
33136
33137/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33138///
33139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699)
33140#[inline]
33141#[target_feature(enable = "avx512f,avx512vl")]
33142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33143#[rustc_legacy_const_generics(2)]
33144#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33145pub fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
33146 unsafe {
33147 static_assert_uimm_bits!(IMM3, 3);
33148 let a: i64x2 = a.as_i64x2();
33149 let b: i64x2 = b.as_i64x2();
33150 let r: i64x2 = match IMM3 {
33151 0 => simd_eq(x:a, y:b),
33152 1 => simd_lt(x:a, y:b),
33153 2 => simd_le(x:a, y:b),
33154 3 => i64x2::ZERO,
33155 4 => simd_ne(x:a, y:b),
33156 5 => simd_ge(x:a, y:b),
33157 6 => simd_gt(x:a, y:b),
33158 _ => i64x2::splat(-1),
33159 };
33160 simd_bitmask(r)
33161 }
33162}
33163
33164/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33165///
33166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700)
33167#[inline]
33168#[target_feature(enable = "avx512f,avx512vl")]
33169#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33170#[rustc_legacy_const_generics(3)]
33171#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33172pub fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33173 k1: __mmask8,
33174 a: __m128i,
33175 b: __m128i,
33176) -> __mmask8 {
33177 unsafe {
33178 static_assert_uimm_bits!(IMM3, 3);
33179 let a: i64x2 = a.as_i64x2();
33180 let b: i64x2 = b.as_i64x2();
33181 let k1: i64x2 = simd_select_bitmask(m:k1, yes:i64x2::splat(-1), no:i64x2::ZERO);
33182 let r: i64x2 = match IMM3 {
33183 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
33184 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
33185 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
33186 3 => i64x2::ZERO,
33187 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
33188 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
33189 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
33190 _ => k1,
33191 };
33192 simd_bitmask(r)
33193 }
33194}
33195
33196/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
33197///
33198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556)
33199#[inline]
33200#[target_feature(enable = "avx512f")]
33201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33202pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
33203 unsafe { simd_reduce_add_unordered(a.as_i32x16()) }
33204}
33205
33206/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33207///
33208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555)
33209#[inline]
33210#[target_feature(enable = "avx512f")]
33211#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33212pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
33213 unsafe { simd_reduce_add_unordered(simd_select_bitmask(m:k, yes:a.as_i32x16(), no:i32x16::ZERO)) }
33214}
33215
33216/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
33217///
33218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558)
33219#[inline]
33220#[target_feature(enable = "avx512f")]
33221#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33222pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
33223 unsafe { simd_reduce_add_unordered(a.as_i64x8()) }
33224}
33225
33226/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33227///
33228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
33229#[inline]
33230#[target_feature(enable = "avx512f")]
33231#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33232pub fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
33233 unsafe { simd_reduce_add_unordered(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::ZERO)) }
33234}
33235
33236/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33237///
33238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562)
33239#[inline]
33240#[target_feature(enable = "avx512f")]
33241#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33242pub fn _mm512_reduce_add_ps(a: __m512) -> f32 {
33243 unsafe {
33244 // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33245 let a: __m256 = _mm256_add_ps(
33246 a:simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33247 b:simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33248 );
33249 let a: __m128 = _mm_add_ps(a:_mm256_extractf128_ps::<0>(a), b:_mm256_extractf128_ps::<1>(a));
33250 let a: __m128 = _mm_add_ps(a, b:simd_shuffle!(a, a, [2, 3, 0, 1]));
33251 simd_extract::<_, f32>(x:a, idx:0) + simd_extract::<_, f32>(x:a, idx:1)
33252 }
33253}
33254
33255/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33256///
33257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561)
33258#[inline]
33259#[target_feature(enable = "avx512f")]
33260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33261pub fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
33262 unsafe { _mm512_reduce_add_ps(simd_select_bitmask(m:k, yes:a, no:_mm512_setzero_ps())) }
33263}
33264
33265/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33266///
33267/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560)
33268#[inline]
33269#[target_feature(enable = "avx512f")]
33270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33271pub fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
33272 unsafe {
33273 let a: __m256d = _mm256_add_pd(
33274 a:_mm512_extractf64x4_pd::<0>(a),
33275 b:_mm512_extractf64x4_pd::<1>(a),
33276 );
33277 let a: __m128d = _mm_add_pd(a:_mm256_extractf128_pd::<0>(a), b:_mm256_extractf128_pd::<1>(a));
33278 simd_extract::<_, f64>(x:a, idx:0) + simd_extract::<_, f64>(x:a, idx:1)
33279 }
33280}
33281
33282/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33283///
33284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559)
33285#[inline]
33286#[target_feature(enable = "avx512f")]
33287#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33288pub fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
33289 unsafe { _mm512_reduce_add_pd(simd_select_bitmask(m:k, yes:a, no:_mm512_setzero_pd())) }
33290}
33291
33292/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
33293///
33294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600)
33295#[inline]
33296#[target_feature(enable = "avx512f")]
33297#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33298pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
33299 unsafe { simd_reduce_mul_unordered(a.as_i32x16()) }
33300}
33301
33302/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33303///
33304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599)
33305#[inline]
33306#[target_feature(enable = "avx512f")]
33307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33308pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
33309 unsafe {
33310 simd_reduce_mul_unordered(simd_select_bitmask(
33311 m:k,
33312 yes:a.as_i32x16(),
33313 no:_mm512_set1_epi32(1).as_i32x16(),
33314 ))
33315 }
33316}
33317
33318/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
33319///
33320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602)
33321#[inline]
33322#[target_feature(enable = "avx512f")]
33323#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33324pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
33325 unsafe { simd_reduce_mul_unordered(a.as_i64x8()) }
33326}
33327
33328/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33329///
33330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601)
33331#[inline]
33332#[target_feature(enable = "avx512f")]
33333#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33334pub fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
33335 unsafe {
33336 simd_reduce_mul_unordered(simd_select_bitmask(
33337 m:k,
33338 yes:a.as_i64x8(),
33339 no:_mm512_set1_epi64(1).as_i64x8(),
33340 ))
33341 }
33342}
33343
33344/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33345///
33346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606)
33347#[inline]
33348#[target_feature(enable = "avx512f")]
33349#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33350pub fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
33351 unsafe {
33352 // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33353 let a: __m256 = _mm256_mul_ps(
33354 a:simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33355 b:simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33356 );
33357 let a: __m128 = _mm_mul_ps(a:_mm256_extractf128_ps::<0>(a), b:_mm256_extractf128_ps::<1>(a));
33358 let a: __m128 = _mm_mul_ps(a, b:simd_shuffle!(a, a, [2, 3, 0, 1]));
33359 simd_extract::<_, f32>(x:a, idx:0) * simd_extract::<_, f32>(x:a, idx:1)
33360 }
33361}
33362
33363/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33364///
33365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605)
33366#[inline]
33367#[target_feature(enable = "avx512f")]
33368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33369pub fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
33370 unsafe { _mm512_reduce_mul_ps(simd_select_bitmask(m:k, yes:a, no:_mm512_set1_ps(1.))) }
33371}
33372
33373/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33374///
33375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604)
33376#[inline]
33377#[target_feature(enable = "avx512f")]
33378#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33379pub fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
33380 unsafe {
33381 let a: __m256d = _mm256_mul_pd(
33382 a:_mm512_extractf64x4_pd::<0>(a),
33383 b:_mm512_extractf64x4_pd::<1>(a),
33384 );
33385 let a: __m128d = _mm_mul_pd(a:_mm256_extractf128_pd::<0>(a), b:_mm256_extractf128_pd::<1>(a));
33386 simd_extract::<_, f64>(x:a, idx:0) * simd_extract::<_, f64>(x:a, idx:1)
33387 }
33388}
33389
33390/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33391///
33392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603)
33393#[inline]
33394#[target_feature(enable = "avx512f")]
33395#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33396pub fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
33397 unsafe { _mm512_reduce_mul_pd(simd_select_bitmask(m:k, yes:a, no:_mm512_set1_pd(1.))) }
33398}
33399
33400/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33401///
33402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576)
33403#[inline]
33404#[target_feature(enable = "avx512f")]
33405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33406pub fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
33407 unsafe { simd_reduce_max(a.as_i32x16()) }
33408}
33409
33410/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33411///
33412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575)
33413#[inline]
33414#[target_feature(enable = "avx512f")]
33415#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33416pub fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
33417 unsafe {
33418 simd_reduce_max(simd_select_bitmask(
33419 m:k,
33420 yes:a.as_i32x16(),
33421 no:i32x16::splat(i32::MIN),
33422 ))
33423 }
33424}
33425
33426/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33427///
33428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578)
33429#[inline]
33430#[target_feature(enable = "avx512f")]
33431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33432pub fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
33433 unsafe { simd_reduce_max(a.as_i64x8()) }
33434}
33435
33436/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33437///
33438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577)
33439#[inline]
33440#[target_feature(enable = "avx512f")]
33441#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33442pub fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
33443 unsafe { simd_reduce_max(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::splat(i64::MIN))) }
33444}
33445
33446/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33447///
33448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580)
33449#[inline]
33450#[target_feature(enable = "avx512f")]
33451#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33452pub fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
33453 unsafe { simd_reduce_max(a.as_u32x16()) }
33454}
33455
33456/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33457///
33458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu32&expand=4579)
33459#[inline]
33460#[target_feature(enable = "avx512f")]
33461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33462pub fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
33463 unsafe { simd_reduce_max(simd_select_bitmask(m:k, yes:a.as_u32x16(), no:u32x16::ZERO)) }
33464}
33465
33466/// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33467///
33468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu64&expand=4582)
33469#[inline]
33470#[target_feature(enable = "avx512f")]
33471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33472pub fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
33473 unsafe { simd_reduce_max(a.as_u64x8()) }
33474}
33475
33476/// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33477///
33478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu64&expand=4581)
33479#[inline]
33480#[target_feature(enable = "avx512f")]
33481#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33482pub fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
33483 unsafe { simd_reduce_max(simd_select_bitmask(m:k, yes:a.as_u64x8(), no:u64x8::ZERO)) }
33484}
33485
33486/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33487///
33488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_ps&expand=4586)
33489#[inline]
33490#[target_feature(enable = "avx512f")]
33491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33492pub fn _mm512_reduce_max_ps(a: __m512) -> f32 {
33493 unsafe {
33494 let a: __m256 = _mm256_max_ps(
33495 a:simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33496 b:simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33497 );
33498 let a: __m128 = _mm_max_ps(a:_mm256_extractf128_ps::<0>(a), b:_mm256_extractf128_ps::<1>(a));
33499 let a: __m128 = _mm_max_ps(a, b:simd_shuffle!(a, a, [2, 3, 0, 1]));
33500 _mm_cvtss_f32(_mm_max_ss(a, b:_mm_movehdup_ps(a)))
33501 }
33502}
33503
33504/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33505///
33506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_ps&expand=4585)
33507#[inline]
33508#[target_feature(enable = "avx512f")]
33509#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33510pub fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
33511 _mm512_reduce_max_ps(_mm512_mask_mov_ps(src:_mm512_set1_ps(f32::MIN), k, a))
33512}
33513
33514/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33515///
33516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_pd&expand=4584)
33517#[inline]
33518#[target_feature(enable = "avx512f")]
33519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33520pub fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
33521 unsafe {
33522 let a: __m256d = _mm256_max_pd(
33523 a:_mm512_extractf64x4_pd::<0>(a),
33524 b:_mm512_extractf64x4_pd::<1>(a),
33525 );
33526 let a: __m128d = _mm_max_pd(a:_mm256_extractf128_pd::<0>(a), b:_mm256_extractf128_pd::<1>(a));
33527 _mm_cvtsd_f64(_mm_max_sd(a, b:simd_shuffle!(a, a, [1, 0])))
33528 }
33529}
33530
33531/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33532///
33533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_pd&expand=4583)
33534#[inline]
33535#[target_feature(enable = "avx512f")]
33536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33537pub fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
33538 _mm512_reduce_max_pd(_mm512_mask_mov_pd(src:_mm512_set1_pd(f64::MIN), k, a))
33539}
33540
33541/// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33542///
33543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi32&expand=4588)
33544#[inline]
33545#[target_feature(enable = "avx512f")]
33546#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33547pub fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
33548 unsafe { simd_reduce_min(a.as_i32x16()) }
33549}
33550
33551/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33552///
33553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi32&expand=4587)
33554#[inline]
33555#[target_feature(enable = "avx512f")]
33556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33557pub fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
33558 unsafe {
33559 simd_reduce_min(simd_select_bitmask(
33560 m:k,
33561 yes:a.as_i32x16(),
33562 no:i32x16::splat(i32::MAX),
33563 ))
33564 }
33565}
33566
33567/// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33568///
33569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi64&expand=4590)
33570#[inline]
33571#[target_feature(enable = "avx512f")]
33572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33573pub fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
33574 unsafe { simd_reduce_min(a.as_i64x8()) }
33575}
33576
33577/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33578///
33579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
33580#[inline]
33581#[target_feature(enable = "avx512f")]
33582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33583pub fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
33584 unsafe { simd_reduce_min(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::splat(i64::MAX))) }
33585}
33586
33587/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33588///
33589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu32&expand=4592)
33590#[inline]
33591#[target_feature(enable = "avx512f")]
33592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33593pub fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
33594 unsafe { simd_reduce_min(a.as_u32x16()) }
33595}
33596
33597/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33598///
33599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu32&expand=4591)
33600#[inline]
33601#[target_feature(enable = "avx512f")]
33602#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33603pub fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
33604 unsafe {
33605 simd_reduce_min(simd_select_bitmask(
33606 m:k,
33607 yes:a.as_u32x16(),
33608 no:u32x16::splat(u32::MAX),
33609 ))
33610 }
33611}
33612
33613/// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33614///
33615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu64&expand=4594)
33616#[inline]
33617#[target_feature(enable = "avx512f")]
33618#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33619pub fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
33620 unsafe { simd_reduce_min(a.as_u64x8()) }
33621}
33622
33623/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33624///
33625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu64&expand=4589)
33626#[inline]
33627#[target_feature(enable = "avx512f")]
33628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33629pub fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
33630 unsafe { simd_reduce_min(simd_select_bitmask(m:k, yes:a.as_u64x8(), no:u64x8::splat(u64::MAX))) }
33631}
33632
33633/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33634///
33635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_ps&expand=4598)
33636#[inline]
33637#[target_feature(enable = "avx512f")]
33638#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33639pub fn _mm512_reduce_min_ps(a: __m512) -> f32 {
33640 unsafe {
33641 let a: __m256 = _mm256_min_ps(
33642 a:simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33643 b:simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33644 );
33645 let a: __m128 = _mm_min_ps(a:_mm256_extractf128_ps::<0>(a), b:_mm256_extractf128_ps::<1>(a));
33646 let a: __m128 = _mm_min_ps(a, b:simd_shuffle!(a, a, [2, 3, 0, 1]));
33647 _mm_cvtss_f32(_mm_min_ss(a, b:_mm_movehdup_ps(a)))
33648 }
33649}
33650
33651/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33652///
33653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_ps&expand=4597)
33654#[inline]
33655#[target_feature(enable = "avx512f")]
33656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33657pub fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
33658 _mm512_reduce_min_ps(_mm512_mask_mov_ps(src:_mm512_set1_ps(f32::MAX), k, a))
33659}
33660
33661/// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33662///
33663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_pd&expand=4596)
33664#[inline]
33665#[target_feature(enable = "avx512f")]
33666#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33667pub fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
33668 unsafe {
33669 let a: __m256d = _mm256_min_pd(
33670 a:_mm512_extractf64x4_pd::<0>(a),
33671 b:_mm512_extractf64x4_pd::<1>(a),
33672 );
33673 let a: __m128d = _mm_min_pd(a:_mm256_extractf128_pd::<0>(a), b:_mm256_extractf128_pd::<1>(a));
33674 _mm_cvtsd_f64(_mm_min_sd(a, b:simd_shuffle!(a, a, [1, 0])))
33675 }
33676}
33677
33678/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33679///
33680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_pd&expand=4595)
33681#[inline]
33682#[target_feature(enable = "avx512f")]
33683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33684pub fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
33685 _mm512_reduce_min_pd(_mm512_mask_mov_pd(src:_mm512_set1_pd(f64::MAX), k, a))
33686}
33687
33688/// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33689///
33690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi32&expand=4564)
33691#[inline]
33692#[target_feature(enable = "avx512f")]
33693#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33694pub fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
33695 unsafe { simd_reduce_and(a.as_i32x16()) }
33696}
33697
33698/// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
33699///
33700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi32&expand=4563)
33701#[inline]
33702#[target_feature(enable = "avx512f")]
33703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33704pub fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
33705 unsafe { simd_reduce_and(simd_select_bitmask(m:k, yes:a.as_i32x16(), no:i32x16::splat(-1))) }
33706}
33707
33708/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33709///
33710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi64&expand=4566)
33711#[inline]
33712#[target_feature(enable = "avx512f")]
33713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33714pub fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
33715 unsafe { simd_reduce_and(a.as_i64x8()) }
33716}
33717
33718/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33719///
33720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi64&expand=4557)
33721#[inline]
33722#[target_feature(enable = "avx512f")]
33723#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33724pub fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
33725 unsafe { simd_reduce_and(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::splat(-1))) }
33726}
33727
33728/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33729///
33730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi32&expand=4608)
33731#[inline]
33732#[target_feature(enable = "avx512f")]
33733#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33734pub fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
33735 unsafe { simd_reduce_or(a.as_i32x16()) }
33736}
33737
33738/// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33739///
33740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi32&expand=4607)
33741#[inline]
33742#[target_feature(enable = "avx512f")]
33743#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33744pub fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
33745 unsafe { simd_reduce_or(simd_select_bitmask(m:k, yes:a.as_i32x16(), no:i32x16::ZERO)) }
33746}
33747
33748/// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33749///
33750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi64&expand=4610)
33751#[inline]
33752#[target_feature(enable = "avx512f")]
33753#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33754pub fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
33755 unsafe { simd_reduce_or(a.as_i64x8()) }
33756}
33757
33758/// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33759///
33760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi64&expand=4609)
33761#[inline]
33762#[target_feature(enable = "avx512f")]
33763#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33764pub fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
33765 unsafe { simd_reduce_or(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::ZERO)) }
33766}
33767
33768/// Returns vector of type `__m512d` with indeterminate elements.
33769/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33770/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33771/// In practice, this is typically equivalent to [`mem::zeroed`].
33772///
33773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_pd)
33774#[inline]
33775#[target_feature(enable = "avx512f")]
33776#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33777// This intrinsic has no corresponding instruction.
33778pub fn _mm512_undefined_pd() -> __m512d {
33779 unsafe { const { mem::zeroed() } }
33780}
33781
33782/// Returns vector of type `__m512` with indeterminate elements.
33783/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33784/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33785/// In practice, this is typically equivalent to [`mem::zeroed`].
33786///
33787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_ps)
33788#[inline]
33789#[target_feature(enable = "avx512f")]
33790#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33791// This intrinsic has no corresponding instruction.
33792pub fn _mm512_undefined_ps() -> __m512 {
33793 unsafe { const { mem::zeroed() } }
33794}
33795
33796/// Return vector of type __m512i with indeterminate elements.
33797/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33798/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33799/// In practice, this is typically equivalent to [`mem::zeroed`].
33800///
33801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_epi32&expand=5995)
33802#[inline]
33803#[target_feature(enable = "avx512f")]
33804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33805// This intrinsic has no corresponding instruction.
33806pub fn _mm512_undefined_epi32() -> __m512i {
33807 unsafe { const { mem::zeroed() } }
33808}
33809
33810/// Return vector of type __m512 with indeterminate elements.
33811/// Despite using the word "undefined" (following Intel's naming scheme), this non-deterministically
33812/// picks some valid value and is not equivalent to [`mem::MaybeUninit`].
33813/// In practice, this is typically equivalent to [`mem::zeroed`].
33814///
33815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined&expand=5994)
33816#[inline]
33817#[target_feature(enable = "avx512f")]
33818#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33819// This intrinsic has no corresponding instruction.
33820pub fn _mm512_undefined() -> __m512 {
33821 unsafe { const { mem::zeroed() } }
33822}
33823
33824/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33825///
33826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi32&expand=3377)
33827#[inline]
33828#[target_feature(enable = "avx512f")]
33829#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33830#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33831pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
33832 ptr::read_unaligned(src:mem_addr as *const __m512i)
33833}
33834
33835/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33836///
33837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi32&expand=3374)
33838#[inline]
33839#[target_feature(enable = "avx512f,avx512vl")]
33840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33841#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33842pub unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
33843 ptr::read_unaligned(src:mem_addr as *const __m256i)
33844}
33845
33846/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33847///
33848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi32&expand=3371)
33849#[inline]
33850#[target_feature(enable = "avx512f,avx512vl")]
33851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33852#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33853pub unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
33854 ptr::read_unaligned(src:mem_addr as *const __m128i)
33855}
33856
33857/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33858///
33859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi16&expand=1460)
33860#[inline]
33861#[target_feature(enable = "avx512f")]
33862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33863#[cfg_attr(test, assert_instr(vpmovdw))]
33864pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
33865 vpmovdwmem(mem_addr.cast(), a.as_i32x16(), mask:k);
33866}
33867
33868/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33869///
33870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi16&expand=1462)
33871#[inline]
33872#[target_feature(enable = "avx512f,avx512vl")]
33873#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33874#[cfg_attr(test, assert_instr(vpmovdw))]
33875pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
33876 vpmovdwmem256(mem_addr.cast(), a.as_i32x8(), mask:k);
33877}
33878
33879/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33880///
33881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi16&expand=1461)
33882#[inline]
33883#[target_feature(enable = "avx512f,avx512vl")]
33884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33885#[cfg_attr(test, assert_instr(vpmovdw))]
33886pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
33887 vpmovdwmem128(mem_addr.cast(), a.as_i32x4(), mask:k);
33888}
33889
33890/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33891///
33892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833)
33893#[inline]
33894#[target_feature(enable = "avx512f")]
33895#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33896#[cfg_attr(test, assert_instr(vpmovsdw))]
33897pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
33898 vpmovsdwmem(mem_addr.cast(), a.as_i32x16(), mask:k);
33899}
33900
33901/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33902///
33903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832)
33904#[inline]
33905#[target_feature(enable = "avx512f,avx512vl")]
33906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33907#[cfg_attr(test, assert_instr(vpmovsdw))]
33908pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
33909 vpmovsdwmem256(mem_addr.cast(), a.as_i32x8(), mask:k);
33910}
33911
33912/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33913///
33914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831)
33915#[inline]
33916#[target_feature(enable = "avx512f,avx512vl")]
33917#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33918#[cfg_attr(test, assert_instr(vpmovsdw))]
33919pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
33920 vpmovsdwmem128(mem_addr.cast(), a.as_i32x4(), mask:k);
33921}
33922
33923/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33924///
33925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068)
33926#[inline]
33927#[target_feature(enable = "avx512f")]
33928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33929#[cfg_attr(test, assert_instr(vpmovusdw))]
33930pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask16, a: __m512i) {
33931 vpmovusdwmem(mem_addr.cast(), a.as_i32x16(), mask:k);
33932}
33933
33934/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33935///
33936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067)
33937#[inline]
33938#[target_feature(enable = "avx512f,avx512vl")]
33939#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33940#[cfg_attr(test, assert_instr(vpmovusdw))]
33941pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
33942 vpmovusdwmem256(mem_addr.cast(), a.as_i32x8(), mask:k);
33943}
33944
33945/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33946///
33947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066)
33948#[inline]
33949#[target_feature(enable = "avx512f,avx512vl")]
33950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33951#[cfg_attr(test, assert_instr(vpmovusdw))]
33952pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
33953 vpmovusdwmem128(mem_addr.cast(), a.as_i32x4(), mask:k);
33954}
33955
33956/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33957///
33958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463)
33959#[inline]
33960#[target_feature(enable = "avx512f")]
33961#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33962#[cfg_attr(test, assert_instr(vpmovdb))]
33963pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33964 vpmovdbmem(mem_addr, a.as_i32x16(), mask:k);
33965}
33966
33967/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33968///
33969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
33970#[inline]
33971#[target_feature(enable = "avx512f,avx512vl")]
33972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33973#[cfg_attr(test, assert_instr(vpmovdb))]
33974pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33975 vpmovdbmem256(mem_addr, a.as_i32x8(), mask:k);
33976}
33977
33978/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33979///
33980/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
33981#[inline]
33982#[target_feature(enable = "avx512f,avx512vl")]
33983#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33984#[cfg_attr(test, assert_instr(vpmovdb))]
33985pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33986 vpmovdbmem128(mem_addr, a.as_i32x4(), mask:k);
33987}
33988
33989/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33990///
33991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836)
33992#[inline]
33993#[target_feature(enable = "avx512f")]
33994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
33995#[cfg_attr(test, assert_instr(vpmovsdb))]
33996pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33997 vpmovsdbmem(mem_addr, a.as_i32x16(), mask:k);
33998}
33999
34000/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34001///
34002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835)
34003#[inline]
34004#[target_feature(enable = "avx512f,avx512vl")]
34005#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34006#[cfg_attr(test, assert_instr(vpmovsdb))]
34007pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34008 vpmovsdbmem256(mem_addr, a.as_i32x8(), mask:k);
34009}
34010
34011/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34012///
34013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834)
34014#[inline]
34015#[target_feature(enable = "avx512f,avx512vl")]
34016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34017#[cfg_attr(test, assert_instr(vpmovsdb))]
34018pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34019 vpmovsdbmem128(mem_addr, a.as_i32x4(), mask:k);
34020}
34021
34022/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34023///
34024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071)
34025#[inline]
34026#[target_feature(enable = "avx512f")]
34027#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34028#[cfg_attr(test, assert_instr(vpmovusdb))]
34029pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
34030 vpmovusdbmem(mem_addr, a.as_i32x16(), mask:k);
34031}
34032
34033/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34034///
34035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070)
34036#[inline]
34037#[target_feature(enable = "avx512f,avx512vl")]
34038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34039#[cfg_attr(test, assert_instr(vpmovusdb))]
34040pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34041 vpmovusdbmem256(mem_addr, a.as_i32x8(), mask:k);
34042}
34043
34044/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34045///
34046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069)
34047#[inline]
34048#[target_feature(enable = "avx512f,avx512vl")]
34049#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34050#[cfg_attr(test, assert_instr(vpmovusdb))]
34051pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34052 vpmovusdbmem128(mem_addr, a.as_i32x4(), mask:k);
34053}
34054
34055/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34056///
34057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513)
34058#[inline]
34059#[target_feature(enable = "avx512f")]
34060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34061#[cfg_attr(test, assert_instr(vpmovqw))]
34062pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
34063 vpmovqwmem(mem_addr.cast(), a.as_i64x8(), mask:k);
34064}
34065
34066/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34067///
34068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi16&expand=1512)
34069#[inline]
34070#[target_feature(enable = "avx512f,avx512vl")]
34071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34072#[cfg_attr(test, assert_instr(vpmovqw))]
34073pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
34074 vpmovqwmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
34075}
34076
34077/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34078///
34079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi16&expand=1511)
34080#[inline]
34081#[target_feature(enable = "avx512f,avx512vl")]
34082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34083#[cfg_attr(test, assert_instr(vpmovqw))]
34084pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
34085 vpmovqwmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
34086}
34087
34088/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34089///
34090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866)
34091#[inline]
34092#[target_feature(enable = "avx512f")]
34093#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34094#[cfg_attr(test, assert_instr(vpmovsqw))]
34095pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
34096 vpmovsqwmem(mem_addr.cast(), a.as_i64x8(), mask:k);
34097}
34098
34099/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34100///
34101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865)
34102#[inline]
34103#[target_feature(enable = "avx512f,avx512vl")]
34104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34105#[cfg_attr(test, assert_instr(vpmovsqw))]
34106pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
34107 vpmovsqwmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
34108}
34109
34110/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34111///
34112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864)
34113#[inline]
34114#[target_feature(enable = "avx512f,avx512vl")]
34115#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34116#[cfg_attr(test, assert_instr(vpmovsqw))]
34117pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
34118 vpmovsqwmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
34119}
34120
34121/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34122///
34123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101)
34124#[inline]
34125#[target_feature(enable = "avx512f")]
34126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34127#[cfg_attr(test, assert_instr(vpmovusqw))]
34128pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m512i) {
34129 vpmovusqwmem(mem_addr.cast(), a.as_i64x8(), mask:k);
34130}
34131
34132/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34133///
34134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100)
34135#[inline]
34136#[target_feature(enable = "avx512f,avx512vl")]
34137#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34138#[cfg_attr(test, assert_instr(vpmovusqw))]
34139pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m256i) {
34140 vpmovusqwmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
34141}
34142
34143/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34144///
34145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099)
34146#[inline]
34147#[target_feature(enable = "avx512f,avx512vl")]
34148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34149#[cfg_attr(test, assert_instr(vpmovusqw))]
34150pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i16, k: __mmask8, a: __m128i) {
34151 vpmovusqwmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
34152}
34153
34154/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34155///
34156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519)
34157#[inline]
34158#[target_feature(enable = "avx512f")]
34159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34160#[cfg_attr(test, assert_instr(vpmovqb))]
34161pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34162 vpmovqbmem(mem_addr, a.as_i64x8(), mask:k);
34163}
34164
34165/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34166///
34167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi8&expand=1518)
34168#[inline]
34169#[target_feature(enable = "avx512f,avx512vl")]
34170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34171#[cfg_attr(test, assert_instr(vpmovqb))]
34172pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34173 vpmovqbmem256(mem_addr, a.as_i64x4(), mask:k);
34174}
34175
34176/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34177///
34178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi8&expand=1517)
34179#[inline]
34180#[target_feature(enable = "avx512f,avx512vl")]
34181#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34182#[cfg_attr(test, assert_instr(vpmovqb))]
34183pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34184 vpmovqbmem128(mem_addr, a.as_i64x2(), mask:k);
34185}
34186
34187/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34188///
34189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872)
34190#[inline]
34191#[target_feature(enable = "avx512f")]
34192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34193#[cfg_attr(test, assert_instr(vpmovsqb))]
34194pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34195 vpmovsqbmem(mem_addr, a.as_i64x8(), mask:k);
34196}
34197
34198/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34199///
34200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871)
34201#[inline]
34202#[target_feature(enable = "avx512f,avx512vl")]
34203#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34204#[cfg_attr(test, assert_instr(vpmovsqb))]
34205pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34206 vpmovsqbmem256(mem_addr, a.as_i64x4(), mask:k);
34207}
34208
34209/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34210///
34211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870)
34212#[inline]
34213#[target_feature(enable = "avx512f,avx512vl")]
34214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34215#[cfg_attr(test, assert_instr(vpmovsqb))]
34216pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34217 vpmovsqbmem128(mem_addr, a.as_i64x2(), mask:k);
34218}
34219
34220/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34221///
34222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107)
34223#[inline]
34224#[target_feature(enable = "avx512f")]
34225#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34226#[cfg_attr(test, assert_instr(vpmovusqb))]
34227pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34228 vpmovusqbmem(mem_addr, a.as_i64x8(), mask:k);
34229}
34230
34231/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34232///
34233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106)
34234#[inline]
34235#[target_feature(enable = "avx512f,avx512vl")]
34236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34237#[cfg_attr(test, assert_instr(vpmovusqb))]
34238pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34239 vpmovusqbmem256(mem_addr, a.as_i64x4(), mask:k);
34240}
34241
34242/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34243///
34244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105)
34245#[inline]
34246#[target_feature(enable = "avx512f,avx512vl")]
34247#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34248#[cfg_attr(test, assert_instr(vpmovusqb))]
34249pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34250 vpmovusqbmem128(mem_addr, a.as_i64x2(), mask:k);
34251}
34252
34253///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34254///
34255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516)
34256#[inline]
34257#[target_feature(enable = "avx512f")]
34258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34259#[cfg_attr(test, assert_instr(vpmovqd))]
34260pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
34261 vpmovqdmem(mem_addr.cast(), a.as_i64x8(), mask:k);
34262}
34263
34264///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34265///
34266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi32&expand=1515)
34267#[inline]
34268#[target_feature(enable = "avx512f,avx512vl")]
34269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34270#[cfg_attr(test, assert_instr(vpmovqd))]
34271pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
34272 vpmovqdmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
34273}
34274
34275///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34276///
34277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi32&expand=1514)
34278#[inline]
34279#[target_feature(enable = "avx512f,avx512vl")]
34280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34281#[cfg_attr(test, assert_instr(vpmovqd))]
34282pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
34283 vpmovqdmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
34284}
34285
34286/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34287///
34288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869)
34289#[inline]
34290#[target_feature(enable = "avx512f")]
34291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34292#[cfg_attr(test, assert_instr(vpmovsqd))]
34293pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
34294 vpmovsqdmem(mem_addr.cast(), a.as_i64x8(), mask:k);
34295}
34296
34297/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34298///
34299/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868)
34300#[inline]
34301#[target_feature(enable = "avx512f,avx512vl")]
34302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34303#[cfg_attr(test, assert_instr(vpmovsqd))]
34304pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
34305 vpmovsqdmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
34306}
34307
34308/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34309///
34310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867)
34311#[inline]
34312#[target_feature(enable = "avx512f,avx512vl")]
34313#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34314#[cfg_attr(test, assert_instr(vpmovsqd))]
34315pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
34316 vpmovsqdmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
34317}
34318
34319/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34320///
34321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104)
34322#[inline]
34323#[target_feature(enable = "avx512f")]
34324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34325#[cfg_attr(test, assert_instr(vpmovusqd))]
34326pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m512i) {
34327 vpmovusqdmem(mem_addr.cast(), a.as_i64x8(), mask:k);
34328}
34329
34330/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34331///
34332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103)
34333#[inline]
34334#[target_feature(enable = "avx512f,avx512vl")]
34335#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34336#[cfg_attr(test, assert_instr(vpmovusqd))]
34337pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m256i) {
34338 vpmovusqdmem256(mem_addr.cast(), a.as_i64x4(), mask:k);
34339}
34340
34341/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34342///
34343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102)
34344#[inline]
34345#[target_feature(enable = "avx512f,avx512vl")]
34346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34347#[cfg_attr(test, assert_instr(vpmovusqd))]
34348pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i32, k: __mmask8, a: __m128i) {
34349 vpmovusqdmem128(mem_addr.cast(), a.as_i64x2(), mask:k);
34350}
34351
34352/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34353///
34354/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi32&expand=5628)
34355#[inline]
34356#[target_feature(enable = "avx512f")]
34357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34358#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34359pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
34360 ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a);
34361}
34362
34363/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34364///
34365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi32&expand=5626)
34366#[inline]
34367#[target_feature(enable = "avx512f,avx512vl")]
34368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34369#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34370pub unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
34371 ptr::write_unaligned(dst:mem_addr as *mut __m256i, src:a);
34372}
34373
34374/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34375///
34376/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi32&expand=5624)
34377#[inline]
34378#[target_feature(enable = "avx512f,avx512vl")]
34379#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34380#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34381pub unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
34382 ptr::write_unaligned(dst:mem_addr as *mut __m128i, src:a);
34383}
34384
34385/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34386///
34387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi64&expand=3386)
34388#[inline]
34389#[target_feature(enable = "avx512f")]
34390#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34391#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34392pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
34393 ptr::read_unaligned(src:mem_addr as *const __m512i)
34394}
34395
34396/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34397///
34398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi64&expand=3383)
34399#[inline]
34400#[target_feature(enable = "avx512f,avx512vl")]
34401#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34402#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34403pub unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
34404 ptr::read_unaligned(src:mem_addr as *const __m256i)
34405}
34406
34407/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34408///
34409/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi64&expand=3380)
34410#[inline]
34411#[target_feature(enable = "avx512f,avx512vl")]
34412#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34413#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34414pub unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
34415 ptr::read_unaligned(src:mem_addr as *const __m128i)
34416}
34417
34418/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34419///
34420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi64&expand=5634)
34421#[inline]
34422#[target_feature(enable = "avx512f")]
34423#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34424#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34425pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
34426 ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a);
34427}
34428
34429/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34430///
34431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi64&expand=5632)
34432#[inline]
34433#[target_feature(enable = "avx512f,avx512vl")]
34434#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34435#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34436pub unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
34437 ptr::write_unaligned(dst:mem_addr as *mut __m256i, src:a);
34438}
34439
34440/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34441///
34442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi64&expand=5630)
34443#[inline]
34444#[target_feature(enable = "avx512f,avx512vl")]
34445#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34446#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34447pub unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
34448 ptr::write_unaligned(dst:mem_addr as *mut __m128i, src:a);
34449}
34450
34451/// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34452///
34453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_si512&expand=3420)
34454#[inline]
34455#[target_feature(enable = "avx512f")]
34456#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34457#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34458pub unsafe fn _mm512_loadu_si512(mem_addr: *const __m512i) -> __m512i {
34459 ptr::read_unaligned(src:mem_addr)
34460}
34461
34462/// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
34463///
34464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_si512&expand=5657)
34465#[inline]
34466#[target_feature(enable = "avx512f")]
34467#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34468#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34469pub unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) {
34470 ptr::write_unaligned(dst:mem_addr, src:a);
34471}
34472
34473/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
34474/// floating-point elements) from memory into result.
34475/// `mem_addr` does not need to be aligned on any particular boundary.
34476///
34477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_pd)
34478#[inline]
34479#[target_feature(enable = "avx512f")]
34480#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34481#[cfg_attr(test, assert_instr(vmovups))]
34482pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
34483 ptr::read_unaligned(src:mem_addr as *const __m512d)
34484}
34485
34486/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
34487/// floating-point elements) from `a` into memory.
34488/// `mem_addr` does not need to be aligned on any particular boundary.
34489///
34490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_pd)
34491#[inline]
34492#[target_feature(enable = "avx512f")]
34493#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34494#[cfg_attr(test, assert_instr(vmovups))]
34495pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
34496 ptr::write_unaligned(dst:mem_addr as *mut __m512d, src:a);
34497}
34498
34499/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
34500/// floating-point elements) from memory into result.
34501/// `mem_addr` does not need to be aligned on any particular boundary.
34502///
34503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_ps)
34504#[inline]
34505#[target_feature(enable = "avx512f")]
34506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34507#[cfg_attr(test, assert_instr(vmovups))]
34508pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
34509 ptr::read_unaligned(src:mem_addr as *const __m512)
34510}
34511
34512/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
34513/// floating-point elements) from `a` into memory.
34514/// `mem_addr` does not need to be aligned on any particular boundary.
34515///
34516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_ps)
34517#[inline]
34518#[target_feature(enable = "avx512f")]
34519#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34520#[cfg_attr(test, assert_instr(vmovups))]
34521pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
34522 ptr::write_unaligned(dst:mem_addr as *mut __m512, src:a);
34523}
34524
34525/// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34526///
34527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_si512&expand=3345)
34528#[inline]
34529#[target_feature(enable = "avx512f")]
34530#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34531#[cfg_attr(
34532 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34533 assert_instr(vmovaps)
34534)] //should be vmovdqa32
34535pub unsafe fn _mm512_load_si512(mem_addr: *const __m512i) -> __m512i {
34536 ptr::read(src:mem_addr)
34537}
34538
34539/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34540///
34541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_si512&expand=5598)
34542#[inline]
34543#[target_feature(enable = "avx512f")]
34544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34545#[cfg_attr(
34546 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34547 assert_instr(vmovaps)
34548)] //should be vmovdqa32
34549pub unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
34550 ptr::write(dst:mem_addr, src:a);
34551}
34552
34553/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34554///
34555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi32&expand=3304)
34556#[inline]
34557#[target_feature(enable = "avx512f")]
34558#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34559#[cfg_attr(
34560 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34561 assert_instr(vmovaps)
34562)] //should be vmovdqa32
34563pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
34564 ptr::read(src:mem_addr as *const __m512i)
34565}
34566
34567/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34568///
34569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi32&expand=3301)
34570#[inline]
34571#[target_feature(enable = "avx512f,avx512vl")]
34572#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34573#[cfg_attr(
34574 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34575 assert_instr(vmovaps)
34576)] //should be vmovdqa32
34577pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
34578 ptr::read(src:mem_addr as *const __m256i)
34579}
34580
34581/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34582///
34583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi32&expand=3298)
34584#[inline]
34585#[target_feature(enable = "avx512f,avx512vl")]
34586#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34587#[cfg_attr(
34588 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34589 assert_instr(vmovaps)
34590)] //should be vmovdqa32
34591pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
34592 ptr::read(src:mem_addr as *const __m128i)
34593}
34594
34595/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34596///
34597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi32&expand=5569)
34598#[inline]
34599#[target_feature(enable = "avx512f")]
34600#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34601#[cfg_attr(
34602 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34603 assert_instr(vmovaps)
34604)] //should be vmovdqa32
34605pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
34606 ptr::write(dst:mem_addr as *mut __m512i, src:a);
34607}
34608
34609/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34610///
34611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi32&expand=5567)
34612#[inline]
34613#[target_feature(enable = "avx512f,avx512vl")]
34614#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34615#[cfg_attr(
34616 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34617 assert_instr(vmovaps)
34618)] //should be vmovdqa32
34619pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
34620 ptr::write(dst:mem_addr as *mut __m256i, src:a);
34621}
34622
34623/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34624///
34625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi32&expand=5565)
34626#[inline]
34627#[target_feature(enable = "avx512f,avx512vl")]
34628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34629#[cfg_attr(
34630 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34631 assert_instr(vmovaps)
34632)] //should be vmovdqa32
34633pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
34634 ptr::write(dst:mem_addr as *mut __m128i, src:a);
34635}
34636
34637/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34638///
34639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi64&expand=3313)
34640#[inline]
34641#[target_feature(enable = "avx512f")]
34642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34643#[cfg_attr(
34644 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34645 assert_instr(vmovaps)
34646)] //should be vmovdqa64
34647pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
34648 ptr::read(src:mem_addr as *const __m512i)
34649}
34650
34651/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34652///
34653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi64&expand=3310)
34654#[inline]
34655#[target_feature(enable = "avx512f,avx512vl")]
34656#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34657#[cfg_attr(
34658 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34659 assert_instr(vmovaps)
34660)] //should be vmovdqa64
34661pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
34662 ptr::read(src:mem_addr as *const __m256i)
34663}
34664
34665/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34666///
34667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi64&expand=3307)
34668#[inline]
34669#[target_feature(enable = "avx512f,avx512vl")]
34670#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34671#[cfg_attr(
34672 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34673 assert_instr(vmovaps)
34674)] //should be vmovdqa64
34675pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
34676 ptr::read(src:mem_addr as *const __m128i)
34677}
34678
34679/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34680///
34681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi64&expand=5575)
34682#[inline]
34683#[target_feature(enable = "avx512f")]
34684#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34685#[cfg_attr(
34686 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34687 assert_instr(vmovaps)
34688)] //should be vmovdqa64
34689pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
34690 ptr::write(dst:mem_addr as *mut __m512i, src:a);
34691}
34692
34693/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34694///
34695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi64&expand=5573)
34696#[inline]
34697#[target_feature(enable = "avx512f,avx512vl")]
34698#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34699#[cfg_attr(
34700 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34701 assert_instr(vmovaps)
34702)] //should be vmovdqa64
34703pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
34704 ptr::write(dst:mem_addr as *mut __m256i, src:a);
34705}
34706
34707/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34708///
34709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi64&expand=5571)
34710#[inline]
34711#[target_feature(enable = "avx512f,avx512vl")]
34712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34713#[cfg_attr(
34714 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34715 assert_instr(vmovaps)
34716)] //should be vmovdqa64
34717pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
34718 ptr::write(dst:mem_addr as *mut __m128i, src:a);
34719}
34720
34721/// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34722///
34723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_ps&expand=3336)
34724#[inline]
34725#[target_feature(enable = "avx512f")]
34726#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34727#[cfg_attr(
34728 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34729 assert_instr(vmovaps)
34730)]
34731pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
34732 ptr::read(src:mem_addr as *const __m512)
34733}
34734
34735/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34736///
34737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_ps&expand=5592)
34738#[inline]
34739#[target_feature(enable = "avx512f")]
34740#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34741#[cfg_attr(
34742 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34743 assert_instr(vmovaps)
34744)]
34745pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
34746 ptr::write(dst:mem_addr as *mut __m512, src:a);
34747}
34748
34749/// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34750///
34751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_pd&expand=3326)
34752#[inline]
34753#[target_feature(enable = "avx512f")]
34754#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34755#[cfg_attr(
34756 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34757 assert_instr(vmovaps)
34758)] //should be vmovapd
34759pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
34760 ptr::read(src:mem_addr as *const __m512d)
34761}
34762
34763/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34764///
34765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_pd&expand=5585)
34766#[inline]
34767#[target_feature(enable = "avx512f")]
34768#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34769#[cfg_attr(
34770 all(test, not(all(target_arch = "x86", target_env = "msvc"))),
34771 assert_instr(vmovaps)
34772)] //should be vmovapd
34773pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
34774 ptr::write(dst:mem_addr as *mut __m512d, src:a);
34775}
34776
34777/// Load packed 32-bit integers from memory into dst using writemask k
34778/// (elements are copied from src when the corresponding mask bit is not set).
34779/// mem_addr does not need to be aligned on any particular boundary.
34780///
34781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi32)
34782#[inline]
34783#[target_feature(enable = "avx512f")]
34784#[cfg_attr(test, assert_instr(vmovdqu32))]
34785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34786pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
34787 transmute(src:loaddqu32_512(mem_addr, a:src.as_i32x16(), mask:k))
34788}
34789
34790/// Load packed 32-bit integers from memory into dst using zeromask k
34791/// (elements are zeroed out when the corresponding mask bit is not set).
34792/// mem_addr does not need to be aligned on any particular boundary.
34793///
34794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi32)
34795#[inline]
34796#[target_feature(enable = "avx512f")]
34797#[cfg_attr(test, assert_instr(vmovdqu32))]
34798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34799pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
34800 _mm512_mask_loadu_epi32(src:_mm512_setzero_si512(), k, mem_addr)
34801}
34802
34803/// Load packed 64-bit integers from memory into dst using writemask k
34804/// (elements are copied from src when the corresponding mask bit is not set).
34805/// mem_addr does not need to be aligned on any particular boundary.
34806///
34807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi64)
34808#[inline]
34809#[target_feature(enable = "avx512f")]
34810#[cfg_attr(test, assert_instr(vmovdqu64))]
34811#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34812pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
34813 transmute(src:loaddqu64_512(mem_addr, a:src.as_i64x8(), mask:k))
34814}
34815
34816/// Load packed 64-bit integers from memory into dst using zeromask k
34817/// (elements are zeroed out when the corresponding mask bit is not set).
34818/// mem_addr does not need to be aligned on any particular boundary.
34819///
34820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi64)
34821#[inline]
34822#[target_feature(enable = "avx512f")]
34823#[cfg_attr(test, assert_instr(vmovdqu64))]
34824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34825pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
34826 _mm512_mask_loadu_epi64(src:_mm512_setzero_si512(), k, mem_addr)
34827}
34828
34829/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34830/// (elements are copied from src when the corresponding mask bit is not set).
34831/// mem_addr does not need to be aligned on any particular boundary.
34832///
34833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_ps)
34834#[inline]
34835#[target_feature(enable = "avx512f")]
34836#[cfg_attr(test, assert_instr(vmovups))]
34837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34838pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
34839 transmute(src:loadups_512(mem_addr, a:src.as_f32x16(), mask:k))
34840}
34841
34842/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34843/// (elements are zeroed out when the corresponding mask bit is not set).
34844/// mem_addr does not need to be aligned on any particular boundary.
34845///
34846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_ps)
34847#[inline]
34848#[target_feature(enable = "avx512f")]
34849#[cfg_attr(test, assert_instr(vmovups))]
34850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34851pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
34852 _mm512_mask_loadu_ps(src:_mm512_setzero_ps(), k, mem_addr)
34853}
34854
34855/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34856/// (elements are copied from src when the corresponding mask bit is not set).
34857/// mem_addr does not need to be aligned on any particular boundary.
34858///
34859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_pd)
34860#[inline]
34861#[target_feature(enable = "avx512f")]
34862#[cfg_attr(test, assert_instr(vmovupd))]
34863#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34864pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
34865 transmute(src:loadupd_512(mem_addr, a:src.as_f64x8(), mask:k))
34866}
34867
34868/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34869/// (elements are zeroed out when the corresponding mask bit is not set).
34870/// mem_addr does not need to be aligned on any particular boundary.
34871///
34872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_pd)
34873#[inline]
34874#[target_feature(enable = "avx512f")]
34875#[cfg_attr(test, assert_instr(vmovupd))]
34876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34877pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
34878 _mm512_mask_loadu_pd(src:_mm512_setzero_pd(), k, mem_addr)
34879}
34880
34881/// Load packed 32-bit integers from memory into dst using writemask k
34882/// (elements are copied from src when the corresponding mask bit is not set).
34883/// mem_addr does not need to be aligned on any particular boundary.
34884///
34885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi32)
34886#[inline]
34887#[target_feature(enable = "avx512f,avx512vl")]
34888#[cfg_attr(test, assert_instr(vmovdqu32))]
34889#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34890pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
34891 transmute(src:loaddqu32_256(mem_addr, a:src.as_i32x8(), mask:k))
34892}
34893
34894/// Load packed 32-bit integers from memory into dst using zeromask k
34895/// (elements are zeroed out when the corresponding mask bit is not set).
34896/// mem_addr does not need to be aligned on any particular boundary.
34897///
34898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi32)
34899#[inline]
34900#[target_feature(enable = "avx512f,avx512vl")]
34901#[cfg_attr(test, assert_instr(vmovdqu32))]
34902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34903pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
34904 _mm256_mask_loadu_epi32(src:_mm256_setzero_si256(), k, mem_addr)
34905}
34906
34907/// Load packed 64-bit integers from memory into dst using writemask k
34908/// (elements are copied from src when the corresponding mask bit is not set).
34909/// mem_addr does not need to be aligned on any particular boundary.
34910///
34911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi64)
34912#[inline]
34913#[target_feature(enable = "avx512f,avx512vl")]
34914#[cfg_attr(test, assert_instr(vmovdqu64))]
34915#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34916pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
34917 transmute(src:loaddqu64_256(mem_addr, a:src.as_i64x4(), mask:k))
34918}
34919
34920/// Load packed 64-bit integers from memory into dst using zeromask k
34921/// (elements are zeroed out when the corresponding mask bit is not set).
34922/// mem_addr does not need to be aligned on any particular boundary.
34923///
34924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi64)
34925#[inline]
34926#[target_feature(enable = "avx512f,avx512vl")]
34927#[cfg_attr(test, assert_instr(vmovdqu64))]
34928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34929pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
34930 _mm256_mask_loadu_epi64(src:_mm256_setzero_si256(), k, mem_addr)
34931}
34932
34933/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34934/// (elements are copied from src when the corresponding mask bit is not set).
34935/// mem_addr does not need to be aligned on any particular boundary.
34936///
34937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_ps)
34938#[inline]
34939#[target_feature(enable = "avx512f,avx512vl")]
34940#[cfg_attr(test, assert_instr(vmovups))]
34941#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34942pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
34943 transmute(src:loadups_256(mem_addr, a:src.as_f32x8(), mask:k))
34944}
34945
34946/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34947/// (elements are zeroed out when the corresponding mask bit is not set).
34948/// mem_addr does not need to be aligned on any particular boundary.
34949///
34950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_ps)
34951#[inline]
34952#[target_feature(enable = "avx512f,avx512vl")]
34953#[cfg_attr(test, assert_instr(vmovups))]
34954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34955pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
34956 _mm256_mask_loadu_ps(src:_mm256_setzero_ps(), k, mem_addr)
34957}
34958
34959/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34960/// (elements are copied from src when the corresponding mask bit is not set).
34961/// mem_addr does not need to be aligned on any particular boundary.
34962///
34963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_pd)
34964#[inline]
34965#[target_feature(enable = "avx512f,avx512vl")]
34966#[cfg_attr(test, assert_instr(vmovupd))]
34967#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34968pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
34969 transmute(src:loadupd_256(mem_addr, a:src.as_f64x4(), mask:k))
34970}
34971
34972/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34973/// (elements are zeroed out when the corresponding mask bit is not set).
34974/// mem_addr does not need to be aligned on any particular boundary.
34975///
34976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_pd)
34977#[inline]
34978#[target_feature(enable = "avx512f,avx512vl")]
34979#[cfg_attr(test, assert_instr(vmovupd))]
34980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34981pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
34982 _mm256_mask_loadu_pd(src:_mm256_setzero_pd(), k, mem_addr)
34983}
34984
34985/// Load packed 32-bit integers from memory into dst using writemask k
34986/// (elements are copied from src when the corresponding mask bit is not set).
34987/// mem_addr does not need to be aligned on any particular boundary.
34988///
34989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi32)
34990#[inline]
34991#[target_feature(enable = "avx512f,avx512vl")]
34992#[cfg_attr(test, assert_instr(vmovdqu32))]
34993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
34994pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
34995 transmute(src:loaddqu32_128(mem_addr, a:src.as_i32x4(), mask:k))
34996}
34997
34998/// Load packed 32-bit integers from memory into dst using zeromask k
34999/// (elements are zeroed out when the corresponding mask bit is not set).
35000/// mem_addr does not need to be aligned on any particular boundary.
35001///
35002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi32)
35003#[inline]
35004#[target_feature(enable = "avx512f,avx512vl")]
35005#[cfg_attr(test, assert_instr(vmovdqu32))]
35006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35007pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35008 _mm_mask_loadu_epi32(src:_mm_setzero_si128(), k, mem_addr)
35009}
35010
35011/// Load packed 64-bit integers from memory into dst using writemask k
35012/// (elements are copied from src when the corresponding mask bit is not set).
35013/// mem_addr does not need to be aligned on any particular boundary.
35014///
35015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi64)
35016#[inline]
35017#[target_feature(enable = "avx512f,avx512vl")]
35018#[cfg_attr(test, assert_instr(vmovdqu64))]
35019#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35020pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
35021 transmute(src:loaddqu64_128(mem_addr, a:src.as_i64x2(), mask:k))
35022}
35023
35024/// Load packed 64-bit integers from memory into dst using zeromask k
35025/// (elements are zeroed out when the corresponding mask bit is not set).
35026/// mem_addr does not need to be aligned on any particular boundary.
35027///
35028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi64)
35029#[inline]
35030#[target_feature(enable = "avx512f,avx512vl")]
35031#[cfg_attr(test, assert_instr(vmovdqu64))]
35032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35033pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35034 _mm_mask_loadu_epi64(src:_mm_setzero_si128(), k, mem_addr)
35035}
35036
35037/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35038/// (elements are copied from src when the corresponding mask bit is not set).
35039/// mem_addr does not need to be aligned on any particular boundary.
35040///
35041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_ps)
35042#[inline]
35043#[target_feature(enable = "avx512f,avx512vl")]
35044#[cfg_attr(test, assert_instr(vmovups))]
35045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35046pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35047 transmute(src:loadups_128(mem_addr, a:src.as_f32x4(), mask:k))
35048}
35049
35050/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35051/// (elements are zeroed out when the corresponding mask bit is not set).
35052/// mem_addr does not need to be aligned on any particular boundary.
35053///
35054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_ps)
35055#[inline]
35056#[target_feature(enable = "avx512f,avx512vl")]
35057#[cfg_attr(test, assert_instr(vmovups))]
35058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35059pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35060 _mm_mask_loadu_ps(src:_mm_setzero_ps(), k, mem_addr)
35061}
35062
35063/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35064/// (elements are copied from src when the corresponding mask bit is not set).
35065/// mem_addr does not need to be aligned on any particular boundary.
35066///
35067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_pd)
35068#[inline]
35069#[target_feature(enable = "avx512f,avx512vl")]
35070#[cfg_attr(test, assert_instr(vmovupd))]
35071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35072pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35073 transmute(src:loadupd_128(mem_addr, a:src.as_f64x2(), mask:k))
35074}
35075
35076/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35077/// (elements are zeroed out when the corresponding mask bit is not set).
35078/// mem_addr does not need to be aligned on any particular boundary.
35079///
35080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_pd)
35081#[inline]
35082#[target_feature(enable = "avx512f,avx512vl")]
35083#[cfg_attr(test, assert_instr(vmovupd))]
35084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35085pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35086 _mm_mask_loadu_pd(src:_mm_setzero_pd(), k, mem_addr)
35087}
35088
35089/// Load packed 32-bit integers from memory into dst using writemask k
35090/// (elements are copied from src when the corresponding mask bit is not set).
35091/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35092///
35093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32)
35094#[inline]
35095#[target_feature(enable = "avx512f")]
35096#[cfg_attr(test, assert_instr(vmovdqa32))]
35097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35098pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
35099 transmute(src:loaddqa32_512(mem_addr, a:src.as_i32x16(), mask:k))
35100}
35101
35102/// Load packed 32-bit integers from memory into dst using zeromask k
35103/// (elements are zeroed out when the corresponding mask bit is not set).
35104/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35105///
35106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32)
35107#[inline]
35108#[target_feature(enable = "avx512f")]
35109#[cfg_attr(test, assert_instr(vmovdqa32))]
35110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35111pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35112 _mm512_mask_load_epi32(src:_mm512_setzero_si512(), k, mem_addr)
35113}
35114
35115/// Load packed 64-bit integers from memory into dst using writemask k
35116/// (elements are copied from src when the corresponding mask bit is not set).
35117/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35118///
35119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64)
35120#[inline]
35121#[target_feature(enable = "avx512f")]
35122#[cfg_attr(test, assert_instr(vmovdqa64))]
35123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35124pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
35125 transmute(src:loaddqa64_512(mem_addr, a:src.as_i64x8(), mask:k))
35126}
35127
35128/// Load packed 64-bit integers from memory into dst using zeromask k
35129/// (elements are zeroed out when the corresponding mask bit is not set).
35130/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35131///
35132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64)
35133#[inline]
35134#[target_feature(enable = "avx512f")]
35135#[cfg_attr(test, assert_instr(vmovdqa64))]
35136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35137pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35138 _mm512_mask_load_epi64(src:_mm512_setzero_si512(), k, mem_addr)
35139}
35140
35141/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35142/// (elements are copied from src when the corresponding mask bit is not set).
35143/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35144///
35145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps)
35146#[inline]
35147#[target_feature(enable = "avx512f")]
35148#[cfg_attr(test, assert_instr(vmovaps))]
35149#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35150pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
35151 transmute(src:loadaps_512(mem_addr, a:src.as_f32x16(), mask:k))
35152}
35153
35154/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35155/// (elements are zeroed out when the corresponding mask bit is not set).
35156/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35157///
35158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps)
35159#[inline]
35160#[target_feature(enable = "avx512f")]
35161#[cfg_attr(test, assert_instr(vmovaps))]
35162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35163pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
35164 _mm512_mask_load_ps(src:_mm512_setzero_ps(), k, mem_addr)
35165}
35166
35167/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35168/// (elements are copied from src when the corresponding mask bit is not set).
35169/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35170///
35171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd)
35172#[inline]
35173#[target_feature(enable = "avx512f")]
35174#[cfg_attr(test, assert_instr(vmovapd))]
35175#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35176pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
35177 transmute(src:loadapd_512(mem_addr, a:src.as_f64x8(), mask:k))
35178}
35179
35180/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35181/// (elements are zeroed out when the corresponding mask bit is not set).
35182/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35183///
35184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd)
35185#[inline]
35186#[target_feature(enable = "avx512f")]
35187#[cfg_attr(test, assert_instr(vmovapd))]
35188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35189pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
35190 _mm512_mask_load_pd(src:_mm512_setzero_pd(), k, mem_addr)
35191}
35192
35193/// Load packed 32-bit integers from memory into dst using writemask k
35194/// (elements are copied from src when the corresponding mask bit is not set).
35195/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35196///
35197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32)
35198#[inline]
35199#[target_feature(enable = "avx512f,avx512vl")]
35200#[cfg_attr(test, assert_instr(vmovdqa32))]
35201#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35202pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
35203 transmute(src:loaddqa32_256(mem_addr, a:src.as_i32x8(), mask:k))
35204}
35205
35206/// Load packed 32-bit integers from memory into dst using zeromask k
35207/// (elements are zeroed out when the corresponding mask bit is not set).
35208/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35209///
35210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32)
35211#[inline]
35212#[target_feature(enable = "avx512f,avx512vl")]
35213#[cfg_attr(test, assert_instr(vmovdqa32))]
35214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35215pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35216 _mm256_mask_load_epi32(src:_mm256_setzero_si256(), k, mem_addr)
35217}
35218
35219/// Load packed 64-bit integers from memory into dst using writemask k
35220/// (elements are copied from src when the corresponding mask bit is not set).
35221/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35222///
35223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64)
35224#[inline]
35225#[target_feature(enable = "avx512f,avx512vl")]
35226#[cfg_attr(test, assert_instr(vmovdqa64))]
35227#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35228pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
35229 transmute(src:loaddqa64_256(mem_addr, a:src.as_i64x4(), mask:k))
35230}
35231
35232/// Load packed 64-bit integers from memory into dst using zeromask k
35233/// (elements are zeroed out when the corresponding mask bit is not set).
35234/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35235///
35236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64)
35237#[inline]
35238#[target_feature(enable = "avx512f,avx512vl")]
35239#[cfg_attr(test, assert_instr(vmovdqa64))]
35240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35241pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35242 _mm256_mask_load_epi64(src:_mm256_setzero_si256(), k, mem_addr)
35243}
35244
35245/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35246/// (elements are copied from src when the corresponding mask bit is not set).
35247/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35248///
35249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps)
35250#[inline]
35251#[target_feature(enable = "avx512f,avx512vl")]
35252#[cfg_attr(test, assert_instr(vmovaps))]
35253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35254pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
35255 transmute(src:loadaps_256(mem_addr, a:src.as_f32x8(), mask:k))
35256}
35257
35258/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35259/// (elements are zeroed out when the corresponding mask bit is not set).
35260/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35261///
35262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps)
35263#[inline]
35264#[target_feature(enable = "avx512f,avx512vl")]
35265#[cfg_attr(test, assert_instr(vmovaps))]
35266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35267pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
35268 _mm256_mask_load_ps(src:_mm256_setzero_ps(), k, mem_addr)
35269}
35270
35271/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35272/// (elements are copied from src when the corresponding mask bit is not set).
35273/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35274///
35275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd)
35276#[inline]
35277#[target_feature(enable = "avx512f,avx512vl")]
35278#[cfg_attr(test, assert_instr(vmovapd))]
35279#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35280pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
35281 transmute(src:loadapd_256(mem_addr, a:src.as_f64x4(), mask:k))
35282}
35283
35284/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35285/// (elements are zeroed out when the corresponding mask bit is not set).
35286/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35287///
35288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd)
35289#[inline]
35290#[target_feature(enable = "avx512f,avx512vl")]
35291#[cfg_attr(test, assert_instr(vmovapd))]
35292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35293pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
35294 _mm256_mask_load_pd(src:_mm256_setzero_pd(), k, mem_addr)
35295}
35296
35297/// Load packed 32-bit integers from memory into dst using writemask k
35298/// (elements are copied from src when the corresponding mask bit is not set).
35299/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35300///
35301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32)
35302#[inline]
35303#[target_feature(enable = "avx512f,avx512vl")]
35304#[cfg_attr(test, assert_instr(vmovdqa32))]
35305#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35306pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
35307 transmute(src:loaddqa32_128(mem_addr, a:src.as_i32x4(), mask:k))
35308}
35309
35310/// Load packed 32-bit integers from memory into dst using zeromask k
35311/// (elements are zeroed out when the corresponding mask bit is not set).
35312/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35313///
35314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32)
35315#[inline]
35316#[target_feature(enable = "avx512f,avx512vl")]
35317#[cfg_attr(test, assert_instr(vmovdqa32))]
35318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35319pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35320 _mm_mask_load_epi32(src:_mm_setzero_si128(), k, mem_addr)
35321}
35322
35323/// Load packed 64-bit integers from memory into dst using writemask k
35324/// (elements are copied from src when the corresponding mask bit is not set).
35325/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35326///
35327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64)
35328#[inline]
35329#[target_feature(enable = "avx512f,avx512vl")]
35330#[cfg_attr(test, assert_instr(vmovdqa64))]
35331#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35332pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
35333 transmute(src:loaddqa64_128(mem_addr, a:src.as_i64x2(), mask:k))
35334}
35335
35336/// Load packed 64-bit integers from memory into dst using zeromask k
35337/// (elements are zeroed out when the corresponding mask bit is not set).
35338/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35339///
35340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64)
35341#[inline]
35342#[target_feature(enable = "avx512f,avx512vl")]
35343#[cfg_attr(test, assert_instr(vmovdqa64))]
35344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35345pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35346 _mm_mask_load_epi64(src:_mm_setzero_si128(), k, mem_addr)
35347}
35348
35349/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35350/// (elements are copied from src when the corresponding mask bit is not set).
35351/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35352///
35353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps)
35354#[inline]
35355#[target_feature(enable = "avx512f,avx512vl")]
35356#[cfg_attr(test, assert_instr(vmovaps))]
35357#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35358pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35359 transmute(src:loadaps_128(mem_addr, a:src.as_f32x4(), mask:k))
35360}
35361
35362/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35363/// (elements are zeroed out when the corresponding mask bit is not set).
35364/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35365///
35366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps)
35367#[inline]
35368#[target_feature(enable = "avx512f,avx512vl")]
35369#[cfg_attr(test, assert_instr(vmovaps))]
35370#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35371pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35372 _mm_mask_load_ps(src:_mm_setzero_ps(), k, mem_addr)
35373}
35374
35375/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35376/// (elements are copied from src when the corresponding mask bit is not set).
35377/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35378///
35379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd)
35380#[inline]
35381#[target_feature(enable = "avx512f,avx512vl")]
35382#[cfg_attr(test, assert_instr(vmovapd))]
35383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35384pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35385 transmute(src:loadapd_128(mem_addr, a:src.as_f64x2(), mask:k))
35386}
35387
35388/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35389/// (elements are zeroed out when the corresponding mask bit is not set).
35390/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35391///
35392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd)
35393#[inline]
35394#[target_feature(enable = "avx512f,avx512vl")]
35395#[cfg_attr(test, assert_instr(vmovapd))]
35396#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35397pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35398 _mm_mask_load_pd(src:_mm_setzero_pd(), k, mem_addr)
35399}
35400
35401/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35402/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35403/// 3 packed elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35404/// exception may be generated.
35405///
35406/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
35407#[inline]
35408#[cfg_attr(test, assert_instr(vmovss))]
35409#[target_feature(enable = "avx512f")]
35410#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35411pub unsafe fn _mm_mask_load_ss(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35412 let mut dst: __m128 = src;
35413 asm!(
35414 vpl!("vmovss {dst}{{{k}}}"),
35415 p = in(reg) mem_addr,
35416 k = in(kreg) k,
35417 dst = inout(xmm_reg) dst,
35418 options(pure, readonly, nostack, preserves_flags),
35419 );
35420 dst
35421}
35422
35423/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35424/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper 3 packed
35425/// elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35426/// exception may be generated.
35427///
35428/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
35429#[inline]
35430#[cfg_attr(test, assert_instr(vmovss))]
35431#[target_feature(enable = "avx512f")]
35432#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35433pub unsafe fn _mm_maskz_load_ss(k: __mmask8, mem_addr: *const f32) -> __m128 {
35434 let mut dst: __m128;
35435 asm!(
35436 vpl!("vmovss {dst}{{{k}}} {{z}}"),
35437 p = in(reg) mem_addr,
35438 k = in(kreg) k,
35439 dst = out(xmm_reg) dst,
35440 options(pure, readonly, nostack, preserves_flags),
35441 );
35442 dst
35443}
35444
35445/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35446/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35447/// element of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35448/// exception may be generated.
35449///
35450/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
35451#[inline]
35452#[cfg_attr(test, assert_instr(vmovsd))]
35453#[target_feature(enable = "avx512f")]
35454#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35455pub unsafe fn _mm_mask_load_sd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35456 let mut dst: __m128d = src;
35457 asm!(
35458 vpl!("vmovsd {dst}{{{k}}}"),
35459 p = in(reg) mem_addr,
35460 k = in(kreg) k,
35461 dst = inout(xmm_reg) dst,
35462 options(pure, readonly, nostack, preserves_flags),
35463 );
35464 dst
35465}
35466
35467/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35468/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper element
35469/// of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection exception
35470/// may be generated.
35471///
35472/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
35473#[inline]
35474#[cfg_attr(test, assert_instr(vmovsd))]
35475#[target_feature(enable = "avx512f")]
35476#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35477pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35478 let mut dst: __m128d;
35479 asm!(
35480 vpl!("vmovsd {dst}{{{k}}} {{z}}"),
35481 p = in(reg) mem_addr,
35482 k = in(kreg) k,
35483 dst = out(xmm_reg) dst,
35484 options(pure, readonly, nostack, preserves_flags),
35485 );
35486 dst
35487}
35488
35489/// Store packed 32-bit integers from a into memory using writemask k.
35490/// mem_addr does not need to be aligned on any particular boundary.
35491///
35492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi32)
35493#[inline]
35494#[target_feature(enable = "avx512f")]
35495#[cfg_attr(test, assert_instr(vmovdqu32))]
35496#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35497pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35498 storedqu32_512(mem_addr, a.as_i32x16(), mask)
35499}
35500
35501/// Store packed 64-bit integers from a into memory using writemask k.
35502/// mem_addr does not need to be aligned on any particular boundary.
35503///
35504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi64)
35505#[inline]
35506#[target_feature(enable = "avx512f")]
35507#[cfg_attr(test, assert_instr(vmovdqu64))]
35508#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35509pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35510 storedqu64_512(mem_addr, a.as_i64x8(), mask)
35511}
35512
35513/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35514/// mem_addr does not need to be aligned on any particular boundary.
35515///
35516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_ps)
35517#[inline]
35518#[target_feature(enable = "avx512f")]
35519#[cfg_attr(test, assert_instr(vmovups))]
35520#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35521pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35522 storeups_512(mem_addr, a.as_f32x16(), mask)
35523}
35524
35525/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35526/// mem_addr does not need to be aligned on any particular boundary.
35527///
35528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_pd)
35529#[inline]
35530#[target_feature(enable = "avx512f")]
35531#[cfg_attr(test, assert_instr(vmovupd))]
35532#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35533pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35534 storeupd_512(mem_addr, a.as_f64x8(), mask)
35535}
35536
35537/// Store packed 32-bit integers from a into memory using writemask k.
35538/// mem_addr does not need to be aligned on any particular boundary.
35539///
35540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi32)
35541#[inline]
35542#[target_feature(enable = "avx512f,avx512vl")]
35543#[cfg_attr(test, assert_instr(vmovdqu32))]
35544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35545pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35546 storedqu32_256(mem_addr, a.as_i32x8(), mask)
35547}
35548
35549/// Store packed 64-bit integers from a into memory using writemask k.
35550/// mem_addr does not need to be aligned on any particular boundary.
35551///
35552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi64)
35553#[inline]
35554#[target_feature(enable = "avx512f,avx512vl")]
35555#[cfg_attr(test, assert_instr(vmovdqu64))]
35556#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35557pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35558 storedqu64_256(mem_addr, a.as_i64x4(), mask)
35559}
35560
35561/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35562/// mem_addr does not need to be aligned on any particular boundary.
35563///
35564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_ps)
35565#[inline]
35566#[target_feature(enable = "avx512f,avx512vl")]
35567#[cfg_attr(test, assert_instr(vmovups))]
35568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35569pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35570 storeups_256(mem_addr, a.as_f32x8(), mask)
35571}
35572
35573/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35574/// mem_addr does not need to be aligned on any particular boundary.
35575///
35576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_pd)
35577#[inline]
35578#[target_feature(enable = "avx512f,avx512vl")]
35579#[cfg_attr(test, assert_instr(vmovupd))]
35580#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35581pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35582 storeupd_256(mem_addr, a.as_f64x4(), mask)
35583}
35584
35585/// Store packed 32-bit integers from a into memory using writemask k.
35586/// mem_addr does not need to be aligned on any particular boundary.
35587///
35588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi32)
35589#[inline]
35590#[target_feature(enable = "avx512f,avx512vl")]
35591#[cfg_attr(test, assert_instr(vmovdqu32))]
35592#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35593pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35594 storedqu32_128(mem_addr, a.as_i32x4(), mask)
35595}
35596
35597/// Store packed 64-bit integers from a into memory using writemask k.
35598/// mem_addr does not need to be aligned on any particular boundary.
35599///
35600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi64)
35601#[inline]
35602#[target_feature(enable = "avx512f,avx512vl")]
35603#[cfg_attr(test, assert_instr(vmovdqu64))]
35604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35605pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35606 storedqu64_128(mem_addr, a.as_i64x2(), mask)
35607}
35608
35609/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35610/// mem_addr does not need to be aligned on any particular boundary.
35611///
35612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_ps)
35613#[inline]
35614#[target_feature(enable = "avx512f,avx512vl")]
35615#[cfg_attr(test, assert_instr(vmovups))]
35616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35617pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35618 storeups_128(mem_addr, a.as_f32x4(), mask)
35619}
35620
35621/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35622/// mem_addr does not need to be aligned on any particular boundary.
35623///
35624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_pd)
35625#[inline]
35626#[target_feature(enable = "avx512f,avx512vl")]
35627#[cfg_attr(test, assert_instr(vmovupd))]
35628#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35629pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35630 storeupd_128(mem_addr, a.as_f64x2(), mask)
35631}
35632
35633/// Store packed 32-bit integers from a into memory using writemask k.
35634/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35635///
35636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32)
35637#[inline]
35638#[target_feature(enable = "avx512f")]
35639#[cfg_attr(test, assert_instr(vmovdqa32))]
35640#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35641pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35642 storedqa32_512(mem_addr, a.as_i32x16(), mask)
35643}
35644
35645/// Store packed 64-bit integers from a into memory using writemask k.
35646/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35647///
35648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64)
35649#[inline]
35650#[target_feature(enable = "avx512f")]
35651#[cfg_attr(test, assert_instr(vmovdqa64))]
35652#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35653pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35654 storedqa64_512(mem_addr, a.as_i64x8(), mask)
35655}
35656
35657/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35658/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35659///
35660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps)
35661#[inline]
35662#[target_feature(enable = "avx512f")]
35663#[cfg_attr(test, assert_instr(vmovaps))]
35664#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35665pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35666 storeaps_512(mem_addr, a.as_f32x16(), mask)
35667}
35668
35669/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35670/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35671///
35672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd)
35673#[inline]
35674#[target_feature(enable = "avx512f")]
35675#[cfg_attr(test, assert_instr(vmovapd))]
35676#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35677pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35678 storeapd_512(mem_addr, a.as_f64x8(), mask)
35679}
35680
35681/// Store packed 32-bit integers from a into memory using writemask k.
35682/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35683///
35684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32)
35685#[inline]
35686#[target_feature(enable = "avx512f,avx512vl")]
35687#[cfg_attr(test, assert_instr(vmovdqa32))]
35688#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35689pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35690 storedqa32_256(mem_addr, a.as_i32x8(), mask)
35691}
35692
35693/// Store packed 64-bit integers from a into memory using writemask k.
35694/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35695///
35696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64)
35697#[inline]
35698#[target_feature(enable = "avx512f,avx512vl")]
35699#[cfg_attr(test, assert_instr(vmovdqa64))]
35700#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35701pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35702 storedqa64_256(mem_addr, a.as_i64x4(), mask)
35703}
35704
35705/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35706/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35707///
35708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps)
35709#[inline]
35710#[target_feature(enable = "avx512f,avx512vl")]
35711#[cfg_attr(test, assert_instr(vmovaps))]
35712#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35713pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35714 storeaps_256(mem_addr, a.as_f32x8(), mask)
35715}
35716
35717/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35718/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35719///
35720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd)
35721#[inline]
35722#[target_feature(enable = "avx512f,avx512vl")]
35723#[cfg_attr(test, assert_instr(vmovapd))]
35724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35725pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35726 storeapd_256(mem_addr, a.as_f64x4(), mask)
35727}
35728
35729/// Store packed 32-bit integers from a into memory using writemask k.
35730/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35731///
35732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32)
35733#[inline]
35734#[target_feature(enable = "avx512f,avx512vl")]
35735#[cfg_attr(test, assert_instr(vmovdqa32))]
35736#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35737pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35738 storedqa32_128(mem_addr, a.as_i32x4(), mask)
35739}
35740
35741/// Store packed 64-bit integers from a into memory using writemask k.
35742/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35743///
35744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64)
35745#[inline]
35746#[target_feature(enable = "avx512f,avx512vl")]
35747#[cfg_attr(test, assert_instr(vmovdqa64))]
35748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35749pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35750 storedqa64_128(mem_addr, a.as_i64x2(), mask)
35751}
35752
35753/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35754/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35755///
35756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps)
35757#[inline]
35758#[target_feature(enable = "avx512f,avx512vl")]
35759#[cfg_attr(test, assert_instr(vmovaps))]
35760#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35761pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35762 storeaps_128(mem_addr, a.as_f32x4(), mask)
35763}
35764
35765/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35766/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35767///
35768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd)
35769#[inline]
35770#[target_feature(enable = "avx512f,avx512vl")]
35771#[cfg_attr(test, assert_instr(vmovapd))]
35772#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35773pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35774 storeapd_128(mem_addr, a.as_f64x2(), mask)
35775}
35776
35777/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
35778/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35779///
35780/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
35781#[inline]
35782#[cfg_attr(test, assert_instr(vmovss))]
35783#[target_feature(enable = "avx512f")]
35784#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35785pub unsafe fn _mm_mask_store_ss(mem_addr: *mut f32, k: __mmask8, a: __m128) {
35786 asm!(
35787 vps!("vmovss", "{{{k}}}, {a}"),
35788 p = in(reg) mem_addr,
35789 k = in(kreg) k,
35790 a = in(xmm_reg) a,
35791 options(nostack, preserves_flags),
35792 );
35793}
35794
35795/// Store a double-precision (64-bit) floating-point element from a into memory using writemask k. mem_addr
35796/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35797///
35798/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
35799#[inline]
35800#[cfg_attr(test, assert_instr(vmovsd))]
35801#[target_feature(enable = "avx512f")]
35802#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35803pub unsafe fn _mm_mask_store_sd(mem_addr: *mut f64, k: __mmask8, a: __m128d) {
35804 asm!(
35805 vps!("vmovsd", "{{{k}}}, {a}"),
35806 p = in(reg) mem_addr,
35807 k = in(kreg) k,
35808 a = in(xmm_reg) a,
35809 options(nostack, preserves_flags),
35810 );
35811}
35812
35813/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35814///
35815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32)
35816#[inline]
35817#[target_feature(enable = "avx512f")]
35818#[cfg_attr(test, assert_instr(vpexpandd))]
35819#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35820pub unsafe fn _mm512_mask_expandloadu_epi32(
35821 src: __m512i,
35822 k: __mmask16,
35823 mem_addr: *const i32,
35824) -> __m512i {
35825 transmute(src:expandloadd_512(mem_addr, a:src.as_i32x16(), mask:k))
35826}
35827
35828/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35829///
35830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi32)
35831#[inline]
35832#[target_feature(enable = "avx512f")]
35833#[cfg_attr(test, assert_instr(vpexpandd))]
35834#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35835pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35836 _mm512_mask_expandloadu_epi32(src:_mm512_setzero_si512(), k, mem_addr)
35837}
35838
35839/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35840///
35841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi32)
35842#[inline]
35843#[target_feature(enable = "avx512f,avx512vl")]
35844#[cfg_attr(test, assert_instr(vpexpandd))]
35845#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35846pub unsafe fn _mm256_mask_expandloadu_epi32(
35847 src: __m256i,
35848 k: __mmask8,
35849 mem_addr: *const i32,
35850) -> __m256i {
35851 transmute(src:expandloadd_256(mem_addr, a:src.as_i32x8(), mask:k))
35852}
35853
35854/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35855///
35856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi32)
35857#[inline]
35858#[target_feature(enable = "avx512f,avx512vl")]
35859#[cfg_attr(test, assert_instr(vpexpandd))]
35860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35861pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35862 _mm256_mask_expandloadu_epi32(src:_mm256_setzero_si256(), k, mem_addr)
35863}
35864
35865/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35866///
35867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi32)
35868#[inline]
35869#[target_feature(enable = "avx512f,avx512vl")]
35870#[cfg_attr(test, assert_instr(vpexpandd))]
35871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35872pub unsafe fn _mm_mask_expandloadu_epi32(
35873 src: __m128i,
35874 k: __mmask8,
35875 mem_addr: *const i32,
35876) -> __m128i {
35877 transmute(src:expandloadd_128(mem_addr, a:src.as_i32x4(), mask:k))
35878}
35879
35880/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35881///
35882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi32)
35883#[inline]
35884#[target_feature(enable = "avx512f,avx512vl")]
35885#[cfg_attr(test, assert_instr(vpexpandd))]
35886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35887pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35888 _mm_mask_expandloadu_epi32(src:_mm_setzero_si128(), k, mem_addr)
35889}
35890
35891/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35892///
35893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi64)
35894#[inline]
35895#[target_feature(enable = "avx512f")]
35896#[cfg_attr(test, assert_instr(vpexpandq))]
35897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35898pub unsafe fn _mm512_mask_expandloadu_epi64(
35899 src: __m512i,
35900 k: __mmask8,
35901 mem_addr: *const i64,
35902) -> __m512i {
35903 transmute(src:expandloadq_512(mem_addr, a:src.as_i64x8(), mask:k))
35904}
35905
35906/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35907///
35908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi64)
35909#[inline]
35910#[target_feature(enable = "avx512f")]
35911#[cfg_attr(test, assert_instr(vpexpandq))]
35912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35913pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35914 _mm512_mask_expandloadu_epi64(src:_mm512_setzero_si512(), k, mem_addr)
35915}
35916
35917/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35918///
35919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi64)
35920#[inline]
35921#[target_feature(enable = "avx512f,avx512vl")]
35922#[cfg_attr(test, assert_instr(vpexpandq))]
35923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35924pub unsafe fn _mm256_mask_expandloadu_epi64(
35925 src: __m256i,
35926 k: __mmask8,
35927 mem_addr: *const i64,
35928) -> __m256i {
35929 transmute(src:expandloadq_256(mem_addr, a:src.as_i64x4(), mask:k))
35930}
35931
35932/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35933///
35934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi64)
35935#[inline]
35936#[target_feature(enable = "avx512f,avx512vl")]
35937#[cfg_attr(test, assert_instr(vpexpandq))]
35938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35939pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35940 _mm256_mask_expandloadu_epi64(src:_mm256_setzero_si256(), k, mem_addr)
35941}
35942
35943/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35944///
35945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi64)
35946#[inline]
35947#[target_feature(enable = "avx512f,avx512vl")]
35948#[cfg_attr(test, assert_instr(vpexpandq))]
35949#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35950pub unsafe fn _mm_mask_expandloadu_epi64(
35951 src: __m128i,
35952 k: __mmask8,
35953 mem_addr: *const i64,
35954) -> __m128i {
35955 transmute(src:expandloadq_128(mem_addr, a:src.as_i64x2(), mask:k))
35956}
35957
35958/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35959///
35960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi64)
35961#[inline]
35962#[target_feature(enable = "avx512f,avx512vl")]
35963#[cfg_attr(test, assert_instr(vpexpandq))]
35964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35965pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35966 _mm_mask_expandloadu_epi64(src:_mm_setzero_si128(), k, mem_addr)
35967}
35968
35969/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35970///
35971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_ps)
35972#[inline]
35973#[target_feature(enable = "avx512f")]
35974#[cfg_attr(test, assert_instr(vexpandps))]
35975#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35976pub unsafe fn _mm512_mask_expandloadu_ps(
35977 src: __m512,
35978 k: __mmask16,
35979 mem_addr: *const f32,
35980) -> __m512 {
35981 transmute(src:expandloadps_512(mem_addr, a:src.as_f32x16(), mask:k))
35982}
35983
35984/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35985///
35986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_ps)
35987#[inline]
35988#[target_feature(enable = "avx512f")]
35989#[cfg_attr(test, assert_instr(vexpandps))]
35990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
35991pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
35992 _mm512_mask_expandloadu_ps(src:_mm512_setzero_ps(), k, mem_addr)
35993}
35994
35995/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35996///
35997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_ps)
35998#[inline]
35999#[target_feature(enable = "avx512f,avx512vl")]
36000#[cfg_attr(test, assert_instr(vexpandps))]
36001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36002pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
36003 transmute(src:expandloadps_256(mem_addr, a:src.as_f32x8(), mask:k))
36004}
36005
36006/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36007///
36008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_ps)
36009#[inline]
36010#[target_feature(enable = "avx512f,avx512vl")]
36011#[cfg_attr(test, assert_instr(vexpandps))]
36012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36013pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
36014 _mm256_mask_expandloadu_ps(src:_mm256_setzero_ps(), k, mem_addr)
36015}
36016
36017/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36018///
36019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_ps)
36020#[inline]
36021#[target_feature(enable = "avx512f,avx512vl")]
36022#[cfg_attr(test, assert_instr(vexpandps))]
36023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36024pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
36025 transmute(src:expandloadps_128(mem_addr, a:src.as_f32x4(), mask:k))
36026}
36027
36028/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36029///
36030/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_ps)
36031#[inline]
36032#[target_feature(enable = "avx512f,avx512vl")]
36033#[cfg_attr(test, assert_instr(vexpandps))]
36034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36035pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
36036 _mm_mask_expandloadu_ps(src:_mm_setzero_ps(), k, mem_addr)
36037}
36038
36039/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36040///
36041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_pd)
36042#[inline]
36043#[target_feature(enable = "avx512f")]
36044#[cfg_attr(test, assert_instr(vexpandpd))]
36045#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36046pub unsafe fn _mm512_mask_expandloadu_pd(
36047 src: __m512d,
36048 k: __mmask8,
36049 mem_addr: *const f64,
36050) -> __m512d {
36051 transmute(src:expandloadpd_512(mem_addr, a:src.as_f64x8(), mask:k))
36052}
36053
36054/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36055///
36056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_pd)
36057#[inline]
36058#[target_feature(enable = "avx512f")]
36059#[cfg_attr(test, assert_instr(vexpandpd))]
36060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36061pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
36062 _mm512_mask_expandloadu_pd(src:_mm512_setzero_pd(), k, mem_addr)
36063}
36064
36065/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36066///
36067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_pd)
36068#[inline]
36069#[target_feature(enable = "avx512f,avx512vl")]
36070#[cfg_attr(test, assert_instr(vexpandpd))]
36071#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36072pub unsafe fn _mm256_mask_expandloadu_pd(
36073 src: __m256d,
36074 k: __mmask8,
36075 mem_addr: *const f64,
36076) -> __m256d {
36077 transmute(src:expandloadpd_256(mem_addr, a:src.as_f64x4(), mask:k))
36078}
36079
36080/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36081///
36082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_pd)
36083#[inline]
36084#[target_feature(enable = "avx512f,avx512vl")]
36085#[cfg_attr(test, assert_instr(vexpandpd))]
36086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36087pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
36088 _mm256_mask_expandloadu_pd(src:_mm256_setzero_pd(), k, mem_addr)
36089}
36090
36091/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36092///
36093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_pd)
36094#[inline]
36095#[target_feature(enable = "avx512f,avx512vl")]
36096#[cfg_attr(test, assert_instr(vexpandpd))]
36097#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36098pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
36099 transmute(src:expandloadpd_128(mem_addr, a:src.as_f64x2(), mask:k))
36100}
36101
36102/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36103///
36104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_pd)
36105#[inline]
36106#[target_feature(enable = "avx512f,avx512vl")]
36107#[cfg_attr(test, assert_instr(vexpandpd))]
36108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36109pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
36110 _mm_mask_expandloadu_pd(src:_mm_setzero_pd(), k, mem_addr)
36111}
36112
36113/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
36114///
36115/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_pd&expand=5002)
36116#[inline]
36117#[target_feature(enable = "avx512f")]
36118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36119pub fn _mm512_setr_pd(
36120 e0: f64,
36121 e1: f64,
36122 e2: f64,
36123 e3: f64,
36124 e4: f64,
36125 e5: f64,
36126 e6: f64,
36127 e7: f64,
36128) -> __m512d {
36129 unsafe {
36130 let r: f64x8 = f64x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7);
36131 transmute(src:r)
36132 }
36133}
36134
36135/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
36136///
36137/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_pd&expand=4924)
36138#[inline]
36139#[target_feature(enable = "avx512f")]
36140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36141pub fn _mm512_set_pd(
36142 e0: f64,
36143 e1: f64,
36144 e2: f64,
36145 e3: f64,
36146 e4: f64,
36147 e5: f64,
36148 e6: f64,
36149 e7: f64,
36150) -> __m512d {
36151 _mm512_setr_pd(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
36152}
36153
36154/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36155///
36156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_ss&expand=3832)
36157#[inline]
36158#[target_feature(enable = "avx512f")]
36159#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36160#[cfg_attr(test, assert_instr(vmovss))]
36161pub fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36162 unsafe {
36163 let extractsrc: f32 = simd_extract!(src, 0);
36164 let mut mov: f32 = extractsrc;
36165 if (k & 0b00000001) != 0 {
36166 mov = simd_extract!(b, 0);
36167 }
36168 simd_insert!(a, 0, mov)
36169 }
36170}
36171
36172/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36173///
36174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_ss&expand=3833)
36175#[inline]
36176#[target_feature(enable = "avx512f")]
36177#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36178#[cfg_attr(test, assert_instr(vmovss))]
36179pub fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36180 unsafe {
36181 let mut mov: f32 = 0.;
36182 if (k & 0b00000001) != 0 {
36183 mov = simd_extract!(b, 0);
36184 }
36185 simd_insert!(a, 0, mov)
36186 }
36187}
36188
36189/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36190///
36191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_sd&expand=3829)
36192#[inline]
36193#[target_feature(enable = "avx512f")]
36194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36195#[cfg_attr(test, assert_instr(vmovsd))]
36196pub fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36197 unsafe {
36198 let extractsrc: f64 = simd_extract!(src, 0);
36199 let mut mov: f64 = extractsrc;
36200 if (k & 0b00000001) != 0 {
36201 mov = simd_extract!(b, 0);
36202 }
36203 simd_insert!(a, 0, mov)
36204 }
36205}
36206
36207/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36208///
36209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_sd&expand=3830)
36210#[inline]
36211#[target_feature(enable = "avx512f")]
36212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36213#[cfg_attr(test, assert_instr(vmovsd))]
36214pub fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36215 unsafe {
36216 let mut mov: f64 = 0.;
36217 if (k & 0b00000001) != 0 {
36218 mov = simd_extract!(b, 0);
36219 }
36220 simd_insert!(a, 0, mov)
36221 }
36222}
36223
36224/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36225///
36226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_ss&expand=159)
36227#[inline]
36228#[target_feature(enable = "avx512f")]
36229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36230#[cfg_attr(test, assert_instr(vaddss))]
36231pub fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36232 unsafe {
36233 let extractsrc: f32 = simd_extract!(src, 0);
36234 let mut add: f32 = extractsrc;
36235 if (k & 0b00000001) != 0 {
36236 let extracta: f32 = simd_extract!(a, 0);
36237 let extractb: f32 = simd_extract!(b, 0);
36238 add = extracta + extractb;
36239 }
36240 simd_insert!(a, 0, add)
36241 }
36242}
36243
36244/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36245///
36246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_ss&expand=160)
36247#[inline]
36248#[target_feature(enable = "avx512f")]
36249#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36250#[cfg_attr(test, assert_instr(vaddss))]
36251pub fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36252 unsafe {
36253 let mut add: f32 = 0.;
36254 if (k & 0b00000001) != 0 {
36255 let extracta: f32 = simd_extract!(a, 0);
36256 let extractb: f32 = simd_extract!(b, 0);
36257 add = extracta + extractb;
36258 }
36259 simd_insert!(a, 0, add)
36260 }
36261}
36262
36263/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36264///
36265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_sd&expand=155)
36266#[inline]
36267#[target_feature(enable = "avx512f")]
36268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36269#[cfg_attr(test, assert_instr(vaddsd))]
36270pub fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36271 unsafe {
36272 let extractsrc: f64 = simd_extract!(src, 0);
36273 let mut add: f64 = extractsrc;
36274 if (k & 0b00000001) != 0 {
36275 let extracta: f64 = simd_extract!(a, 0);
36276 let extractb: f64 = simd_extract!(b, 0);
36277 add = extracta + extractb;
36278 }
36279 simd_insert!(a, 0, add)
36280 }
36281}
36282
36283/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36284///
36285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_sd&expand=156)
36286#[inline]
36287#[target_feature(enable = "avx512f")]
36288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36289#[cfg_attr(test, assert_instr(vaddsd))]
36290pub fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36291 unsafe {
36292 let mut add: f64 = 0.;
36293 if (k & 0b00000001) != 0 {
36294 let extracta: f64 = simd_extract!(a, 0);
36295 let extractb: f64 = simd_extract!(b, 0);
36296 add = extracta + extractb;
36297 }
36298 simd_insert!(a, 0, add)
36299 }
36300}
36301
36302/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36303///
36304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_ss&expand=5750)
36305#[inline]
36306#[target_feature(enable = "avx512f")]
36307#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36308#[cfg_attr(test, assert_instr(vsubss))]
36309pub fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36310 unsafe {
36311 let extractsrc: f32 = simd_extract!(src, 0);
36312 let mut add: f32 = extractsrc;
36313 if (k & 0b00000001) != 0 {
36314 let extracta: f32 = simd_extract!(a, 0);
36315 let extractb: f32 = simd_extract!(b, 0);
36316 add = extracta - extractb;
36317 }
36318 simd_insert!(a, 0, add)
36319 }
36320}
36321
36322/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36323///
36324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_ss&expand=5751)
36325#[inline]
36326#[target_feature(enable = "avx512f")]
36327#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36328#[cfg_attr(test, assert_instr(vsubss))]
36329pub fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36330 unsafe {
36331 let mut add: f32 = 0.;
36332 if (k & 0b00000001) != 0 {
36333 let extracta: f32 = simd_extract!(a, 0);
36334 let extractb: f32 = simd_extract!(b, 0);
36335 add = extracta - extractb;
36336 }
36337 simd_insert!(a, 0, add)
36338 }
36339}
36340
36341/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36342///
36343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_sd&expand=5746)
36344#[inline]
36345#[target_feature(enable = "avx512f")]
36346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36347#[cfg_attr(test, assert_instr(vsubsd))]
36348pub fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36349 unsafe {
36350 let extractsrc: f64 = simd_extract!(src, 0);
36351 let mut add: f64 = extractsrc;
36352 if (k & 0b00000001) != 0 {
36353 let extracta: f64 = simd_extract!(a, 0);
36354 let extractb: f64 = simd_extract!(b, 0);
36355 add = extracta - extractb;
36356 }
36357 simd_insert!(a, 0, add)
36358 }
36359}
36360
36361/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36362///
36363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_sd&expand=5747)
36364#[inline]
36365#[target_feature(enable = "avx512f")]
36366#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36367#[cfg_attr(test, assert_instr(vsubsd))]
36368pub fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36369 unsafe {
36370 let mut add: f64 = 0.;
36371 if (k & 0b00000001) != 0 {
36372 let extracta: f64 = simd_extract!(a, 0);
36373 let extractb: f64 = simd_extract!(b, 0);
36374 add = extracta - extractb;
36375 }
36376 simd_insert!(a, 0, add)
36377 }
36378}
36379
36380/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36381///
36382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_ss&expand=3950)
36383#[inline]
36384#[target_feature(enable = "avx512f")]
36385#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36386#[cfg_attr(test, assert_instr(vmulss))]
36387pub fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36388 unsafe {
36389 let extractsrc: f32 = simd_extract!(src, 0);
36390 let mut add: f32 = extractsrc;
36391 if (k & 0b00000001) != 0 {
36392 let extracta: f32 = simd_extract!(a, 0);
36393 let extractb: f32 = simd_extract!(b, 0);
36394 add = extracta * extractb;
36395 }
36396 simd_insert!(a, 0, add)
36397 }
36398}
36399
36400/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36401///
36402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_ss&expand=3951)
36403#[inline]
36404#[target_feature(enable = "avx512f")]
36405#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36406#[cfg_attr(test, assert_instr(vmulss))]
36407pub fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36408 unsafe {
36409 let mut add: f32 = 0.;
36410 if (k & 0b00000001) != 0 {
36411 let extracta: f32 = simd_extract!(a, 0);
36412 let extractb: f32 = simd_extract!(b, 0);
36413 add = extracta * extractb;
36414 }
36415 simd_insert!(a, 0, add)
36416 }
36417}
36418
36419/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36420///
36421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_sd&expand=3947)
36422#[inline]
36423#[target_feature(enable = "avx512f")]
36424#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36425#[cfg_attr(test, assert_instr(vmulsd))]
36426pub fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36427 unsafe {
36428 let extractsrc: f64 = simd_extract!(src, 0);
36429 let mut add: f64 = extractsrc;
36430 if (k & 0b00000001) != 0 {
36431 let extracta: f64 = simd_extract!(a, 0);
36432 let extractb: f64 = simd_extract!(b, 0);
36433 add = extracta * extractb;
36434 }
36435 simd_insert!(a, 0, add)
36436 }
36437}
36438
36439/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36440///
36441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_sd&expand=3948)
36442#[inline]
36443#[target_feature(enable = "avx512f")]
36444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36445#[cfg_attr(test, assert_instr(vmulsd))]
36446pub fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36447 unsafe {
36448 let mut add: f64 = 0.;
36449 if (k & 0b00000001) != 0 {
36450 let extracta: f64 = simd_extract!(a, 0);
36451 let extractb: f64 = simd_extract!(b, 0);
36452 add = extracta * extractb;
36453 }
36454 simd_insert!(a, 0, add)
36455 }
36456}
36457
36458/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36459///
36460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_ss&expand=2181)
36461#[inline]
36462#[target_feature(enable = "avx512f")]
36463#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36464#[cfg_attr(test, assert_instr(vdivss))]
36465pub fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36466 unsafe {
36467 let extractsrc: f32 = simd_extract!(src, 0);
36468 let mut add: f32 = extractsrc;
36469 if (k & 0b00000001) != 0 {
36470 let extracta: f32 = simd_extract!(a, 0);
36471 let extractb: f32 = simd_extract!(b, 0);
36472 add = extracta / extractb;
36473 }
36474 simd_insert!(a, 0, add)
36475 }
36476}
36477
36478/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36479///
36480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_ss&expand=2182)
36481#[inline]
36482#[target_feature(enable = "avx512f")]
36483#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36484#[cfg_attr(test, assert_instr(vdivss))]
36485pub fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36486 unsafe {
36487 let mut add: f32 = 0.;
36488 if (k & 0b00000001) != 0 {
36489 let extracta: f32 = simd_extract!(a, 0);
36490 let extractb: f32 = simd_extract!(b, 0);
36491 add = extracta / extractb;
36492 }
36493 simd_insert!(a, 0, add)
36494 }
36495}
36496
36497/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36498///
36499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_sd&expand=2178)
36500#[inline]
36501#[target_feature(enable = "avx512f")]
36502#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36503#[cfg_attr(test, assert_instr(vdivsd))]
36504pub fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36505 unsafe {
36506 let extractsrc: f64 = simd_extract!(src, 0);
36507 let mut add: f64 = extractsrc;
36508 if (k & 0b00000001) != 0 {
36509 let extracta: f64 = simd_extract!(a, 0);
36510 let extractb: f64 = simd_extract!(b, 0);
36511 add = extracta / extractb;
36512 }
36513 simd_insert!(a, 0, add)
36514 }
36515}
36516
36517/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36518///
36519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_sd&expand=2179)
36520#[inline]
36521#[target_feature(enable = "avx512f")]
36522#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36523#[cfg_attr(test, assert_instr(vdivsd))]
36524pub fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36525 unsafe {
36526 let mut add: f64 = 0.;
36527 if (k & 0b00000001) != 0 {
36528 let extracta: f64 = simd_extract!(a, 0);
36529 let extractb: f64 = simd_extract!(b, 0);
36530 add = extracta / extractb;
36531 }
36532 simd_insert!(a, 0, add)
36533 }
36534}
36535
36536/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36537///
36538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
36539#[inline]
36540#[target_feature(enable = "avx512f")]
36541#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36542#[cfg_attr(test, assert_instr(vmaxss))]
36543pub fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36544 unsafe {
36545 transmute(src:vmaxss(
36546 a.as_f32x4(),
36547 b.as_f32x4(),
36548 src.as_f32x4(),
36549 mask:k,
36550 _MM_FROUND_CUR_DIRECTION,
36551 ))
36552 }
36553}
36554
36555/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36556///
36557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_ss&expand=3673)
36558#[inline]
36559#[target_feature(enable = "avx512f")]
36560#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36561#[cfg_attr(test, assert_instr(vmaxss))]
36562pub fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36563 unsafe {
36564 transmute(src:vmaxss(
36565 a.as_f32x4(),
36566 b.as_f32x4(),
36567 src:f32x4::ZERO,
36568 mask:k,
36569 _MM_FROUND_CUR_DIRECTION,
36570 ))
36571 }
36572}
36573
36574/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36575///
36576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_sd&expand=3669)
36577#[inline]
36578#[target_feature(enable = "avx512f")]
36579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36580#[cfg_attr(test, assert_instr(vmaxsd))]
36581pub fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36582 unsafe {
36583 transmute(src:vmaxsd(
36584 a.as_f64x2(),
36585 b.as_f64x2(),
36586 src.as_f64x2(),
36587 mask:k,
36588 _MM_FROUND_CUR_DIRECTION,
36589 ))
36590 }
36591}
36592
36593/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36594///
36595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
36596#[inline]
36597#[target_feature(enable = "avx512f")]
36598#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36599#[cfg_attr(test, assert_instr(vmaxsd))]
36600pub fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36601 unsafe {
36602 transmute(src:vmaxsd(
36603 a.as_f64x2(),
36604 b.as_f64x2(),
36605 src:f64x2::ZERO,
36606 mask:k,
36607 _MM_FROUND_CUR_DIRECTION,
36608 ))
36609 }
36610}
36611
36612/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36613///
36614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_ss&expand=3786)
36615#[inline]
36616#[target_feature(enable = "avx512f")]
36617#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36618#[cfg_attr(test, assert_instr(vminss))]
36619pub fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36620 unsafe {
36621 transmute(src:vminss(
36622 a.as_f32x4(),
36623 b.as_f32x4(),
36624 src.as_f32x4(),
36625 mask:k,
36626 _MM_FROUND_CUR_DIRECTION,
36627 ))
36628 }
36629}
36630
36631/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36632///
36633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_ss&expand=3787)
36634#[inline]
36635#[target_feature(enable = "avx512f")]
36636#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36637#[cfg_attr(test, assert_instr(vminss))]
36638pub fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36639 unsafe {
36640 transmute(src:vminss(
36641 a.as_f32x4(),
36642 b.as_f32x4(),
36643 src:f32x4::ZERO,
36644 mask:k,
36645 _MM_FROUND_CUR_DIRECTION,
36646 ))
36647 }
36648}
36649
36650/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36651///
36652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_sd&expand=3783)
36653#[inline]
36654#[target_feature(enable = "avx512f")]
36655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36656#[cfg_attr(test, assert_instr(vminsd))]
36657pub fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36658 unsafe {
36659 transmute(src:vminsd(
36660 a.as_f64x2(),
36661 b.as_f64x2(),
36662 src.as_f64x2(),
36663 mask:k,
36664 _MM_FROUND_CUR_DIRECTION,
36665 ))
36666 }
36667}
36668
36669/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36670///
36671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_sd&expand=3784)
36672#[inline]
36673#[target_feature(enable = "avx512f")]
36674#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36675#[cfg_attr(test, assert_instr(vminsd))]
36676pub fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36677 unsafe {
36678 transmute(src:vminsd(
36679 a.as_f64x2(),
36680 b.as_f64x2(),
36681 src:f64x2::ZERO,
36682 mask:k,
36683 _MM_FROUND_CUR_DIRECTION,
36684 ))
36685 }
36686}
36687
36688/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36689///
36690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_ss&expand=5387)
36691#[inline]
36692#[target_feature(enable = "avx512f")]
36693#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36694#[cfg_attr(test, assert_instr(vsqrtss))]
36695pub fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36696 unsafe { vsqrtss(a, b, src, mask:k, _MM_FROUND_CUR_DIRECTION) }
36697}
36698
36699/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36700///
36701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_ss&expand=5388)
36702#[inline]
36703#[target_feature(enable = "avx512f")]
36704#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36705#[cfg_attr(test, assert_instr(vsqrtss))]
36706pub fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36707 unsafe { vsqrtss(a, b, src:_mm_setzero_ps(), mask:k, _MM_FROUND_CUR_DIRECTION) }
36708}
36709
36710/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36711///
36712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_sd&expand=5384)
36713#[inline]
36714#[target_feature(enable = "avx512f")]
36715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36716#[cfg_attr(test, assert_instr(vsqrtsd))]
36717pub fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36718 unsafe { vsqrtsd(a, b, src, mask:k, _MM_FROUND_CUR_DIRECTION) }
36719}
36720
36721/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36722///
36723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_sd&expand=5385)
36724#[inline]
36725#[target_feature(enable = "avx512f")]
36726#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36727#[cfg_attr(test, assert_instr(vsqrtsd))]
36728pub fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36729 unsafe { vsqrtsd(a, b, src:_mm_setzero_pd(), mask:k, _MM_FROUND_CUR_DIRECTION) }
36730}
36731
36732/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36733///
36734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_ss&expand=4825)
36735#[inline]
36736#[target_feature(enable = "avx512f")]
36737#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36738#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36739pub fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
36740 unsafe { transmute(src:vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:0b1)) }
36741}
36742
36743/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36744///
36745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_ss&expand=4823)
36746#[inline]
36747#[target_feature(enable = "avx512f")]
36748#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36749#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36750pub fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36751 unsafe { transmute(src:vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), mask:k)) }
36752}
36753
36754/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36755///
36756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_ss&expand=4824)
36757#[inline]
36758#[target_feature(enable = "avx512f")]
36759#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36760#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36761pub fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36762 unsafe { transmute(src:vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:k)) }
36763}
36764
36765/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36766///
36767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_sd&expand=4822)
36768#[inline]
36769#[target_feature(enable = "avx512f")]
36770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36771#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36772pub fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
36773 unsafe { transmute(src:vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:0b1)) }
36774}
36775
36776/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36777///
36778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_sd&expand=4820)
36779#[inline]
36780#[target_feature(enable = "avx512f")]
36781#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36782#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36783pub fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36784 unsafe { transmute(src:vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), mask:k)) }
36785}
36786
36787/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36788///
36789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_sd&expand=4821)
36790#[inline]
36791#[target_feature(enable = "avx512f")]
36792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36793#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36794pub fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36795 unsafe { transmute(src:vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:k)) }
36796}
36797
36798/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36799///
36800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_ss&expand=4508)
36801#[inline]
36802#[target_feature(enable = "avx512f")]
36803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36804#[cfg_attr(test, assert_instr(vrcp14ss))]
36805pub fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
36806 unsafe { transmute(src:vrcp14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:0b1)) }
36807}
36808
36809/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36810///
36811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_ss&expand=4506)
36812#[inline]
36813#[target_feature(enable = "avx512f")]
36814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36815#[cfg_attr(test, assert_instr(vrcp14ss))]
36816pub fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36817 unsafe { transmute(src:vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), mask:k)) }
36818}
36819
36820/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36821///
36822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_ss&expand=4507)
36823#[inline]
36824#[target_feature(enable = "avx512f")]
36825#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36826#[cfg_attr(test, assert_instr(vrcp14ss))]
36827pub fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36828 unsafe { transmute(src:vrcp14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:k)) }
36829}
36830
36831/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36832///
36833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_sd&expand=4505)
36834#[inline]
36835#[target_feature(enable = "avx512f")]
36836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36837#[cfg_attr(test, assert_instr(vrcp14sd))]
36838pub fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
36839 unsafe { transmute(src:vrcp14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:0b1)) }
36840}
36841
36842/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36843///
36844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_sd&expand=4503)
36845#[inline]
36846#[target_feature(enable = "avx512f")]
36847#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36848#[cfg_attr(test, assert_instr(vrcp14sd))]
36849pub fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36850 unsafe { transmute(src:vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), mask:k)) }
36851}
36852
36853/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36854///
36855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_sd&expand=4504)
36856#[inline]
36857#[target_feature(enable = "avx512f")]
36858#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36859#[cfg_attr(test, assert_instr(vrcp14sd))]
36860pub fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36861 unsafe { transmute(src:vrcp14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:k)) }
36862}
36863
36864/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36865///
36866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_ss&expand=2862)
36867#[inline]
36868#[target_feature(enable = "avx512f")]
36869#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36870#[cfg_attr(test, assert_instr(vgetexpss))]
36871pub fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
36872 unsafe {
36873 transmute(src:vgetexpss(
36874 a.as_f32x4(),
36875 b.as_f32x4(),
36876 src:f32x4::ZERO,
36877 mask:0b1,
36878 _MM_FROUND_NO_EXC,
36879 ))
36880 }
36881}
36882
36883/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36884///
36885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_ss&expand=2863)
36886#[inline]
36887#[target_feature(enable = "avx512f")]
36888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36889#[cfg_attr(test, assert_instr(vgetexpss))]
36890pub fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36891 unsafe {
36892 transmute(src:vgetexpss(
36893 a.as_f32x4(),
36894 b.as_f32x4(),
36895 src.as_f32x4(),
36896 mask:k,
36897 _MM_FROUND_NO_EXC,
36898 ))
36899 }
36900}
36901
36902/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36903///
36904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_ss&expand=2864)
36905#[inline]
36906#[target_feature(enable = "avx512f")]
36907#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36908#[cfg_attr(test, assert_instr(vgetexpss))]
36909pub fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36910 unsafe {
36911 transmute(src:vgetexpss(
36912 a.as_f32x4(),
36913 b.as_f32x4(),
36914 src:f32x4::ZERO,
36915 mask:k,
36916 _MM_FROUND_NO_EXC,
36917 ))
36918 }
36919}
36920
36921/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36922///
36923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_sd&expand=2859)
36924#[inline]
36925#[target_feature(enable = "avx512f")]
36926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36927#[cfg_attr(test, assert_instr(vgetexpsd))]
36928pub fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
36929 unsafe {
36930 transmute(src:vgetexpsd(
36931 a.as_f64x2(),
36932 b.as_f64x2(),
36933 src:f64x2::ZERO,
36934 mask:0b1,
36935 _MM_FROUND_NO_EXC,
36936 ))
36937 }
36938}
36939
36940/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36941///
36942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_sd&expand=2860)
36943#[inline]
36944#[target_feature(enable = "avx512f")]
36945#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36946#[cfg_attr(test, assert_instr(vgetexpsd))]
36947pub fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36948 unsafe {
36949 transmute(src:vgetexpsd(
36950 a.as_f64x2(),
36951 b.as_f64x2(),
36952 src.as_f64x2(),
36953 mask:k,
36954 _MM_FROUND_NO_EXC,
36955 ))
36956 }
36957}
36958
36959/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36960///
36961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_sd&expand=2861)
36962#[inline]
36963#[target_feature(enable = "avx512f")]
36964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36965#[cfg_attr(test, assert_instr(vgetexpsd))]
36966pub fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36967 unsafe {
36968 transmute(src:vgetexpsd(
36969 a.as_f64x2(),
36970 b.as_f64x2(),
36971 src:f64x2::ZERO,
36972 mask:k,
36973 _MM_FROUND_NO_EXC,
36974 ))
36975 }
36976}
36977
36978/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36979/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36980/// _MM_MANT_NORM_1_2 // interval [1, 2)\
36981/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
36982/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
36983/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
36984/// The sign is determined by sc which can take the following values:\
36985/// _MM_MANT_SIGN_src // sign = sign(src)\
36986/// _MM_MANT_SIGN_zero // sign = 0\
36987/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
36988/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36989///
36990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_ss&expand=2898)
36991#[inline]
36992#[target_feature(enable = "avx512f")]
36993#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
36994#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
36995#[rustc_legacy_const_generics(2, 3)]
36996pub fn _mm_getmant_ss<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
36997 a: __m128,
36998 b: __m128,
36999) -> __m128 {
37000 unsafe {
37001 static_assert_uimm_bits!(NORM, 4);
37002 static_assert_uimm_bits!(SIGN, 2);
37003 let a: f32x4 = a.as_f32x4();
37004 let b: f32x4 = b.as_f32x4();
37005 let r: f32x4 = vgetmantss(
37006 a,
37007 b,
37008 SIGN << 2 | NORM,
37009 src:f32x4::ZERO,
37010 m:0b1,
37011 _MM_FROUND_CUR_DIRECTION,
37012 );
37013 transmute(src:r)
37014 }
37015}
37016
37017/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37018/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37019/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37020/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37021/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37022/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37023/// The sign is determined by sc which can take the following values:\
37024/// _MM_MANT_SIGN_src // sign = sign(src)\
37025/// _MM_MANT_SIGN_zero // sign = 0\
37026/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37027/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37028///
37029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_ss&expand=2899)
37030#[inline]
37031#[target_feature(enable = "avx512f")]
37032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37033#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
37034#[rustc_legacy_const_generics(4, 5)]
37035pub fn _mm_mask_getmant_ss<
37036 const NORM: _MM_MANTISSA_NORM_ENUM,
37037 const SIGN: _MM_MANTISSA_SIGN_ENUM,
37038>(
37039 src: __m128,
37040 k: __mmask8,
37041 a: __m128,
37042 b: __m128,
37043) -> __m128 {
37044 unsafe {
37045 static_assert_uimm_bits!(NORM, 4);
37046 static_assert_uimm_bits!(SIGN, 2);
37047 let a: f32x4 = a.as_f32x4();
37048 let b: f32x4 = b.as_f32x4();
37049 let src: f32x4 = src.as_f32x4();
37050 let r: f32x4 = vgetmantss(a, b, SIGN << 2 | NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
37051 transmute(src:r)
37052 }
37053}
37054
37055/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37056/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37057/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37058/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37059/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37060/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37061/// The sign is determined by sc which can take the following values:\
37062/// _MM_MANT_SIGN_src // sign = sign(src)\
37063/// _MM_MANT_SIGN_zero // sign = 0\
37064/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37065/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37066///
37067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_ss&expand=2900)
37068#[inline]
37069#[target_feature(enable = "avx512f")]
37070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37071#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
37072#[rustc_legacy_const_generics(3, 4)]
37073pub fn _mm_maskz_getmant_ss<
37074 const NORM: _MM_MANTISSA_NORM_ENUM,
37075 const SIGN: _MM_MANTISSA_SIGN_ENUM,
37076>(
37077 k: __mmask8,
37078 a: __m128,
37079 b: __m128,
37080) -> __m128 {
37081 unsafe {
37082 static_assert_uimm_bits!(NORM, 4);
37083 static_assert_uimm_bits!(SIGN, 2);
37084 let a: f32x4 = a.as_f32x4();
37085 let b: f32x4 = b.as_f32x4();
37086 let r: f32x4 = vgetmantss(
37087 a,
37088 b,
37089 SIGN << 2 | NORM,
37090 src:f32x4::ZERO,
37091 m:k,
37092 _MM_FROUND_CUR_DIRECTION,
37093 );
37094 transmute(src:r)
37095 }
37096}
37097
37098/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37099/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37100/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37101/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37102/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37103/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37104/// The sign is determined by sc which can take the following values:\
37105/// _MM_MANT_SIGN_src // sign = sign(src)\
37106/// _MM_MANT_SIGN_zero // sign = 0\
37107/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37108/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37109///
37110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_sd&expand=2895)
37111#[inline]
37112#[target_feature(enable = "avx512f")]
37113#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37114#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37115#[rustc_legacy_const_generics(2, 3)]
37116pub fn _mm_getmant_sd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
37117 a: __m128d,
37118 b: __m128d,
37119) -> __m128d {
37120 unsafe {
37121 static_assert_uimm_bits!(NORM, 4);
37122 static_assert_uimm_bits!(SIGN, 2);
37123 let a: f64x2 = a.as_f64x2();
37124 let b: f64x2 = b.as_f64x2();
37125 let r: f64x2 = vgetmantsd(
37126 a,
37127 b,
37128 SIGN << 2 | NORM,
37129 src:f64x2::ZERO,
37130 m:0b1,
37131 _MM_FROUND_CUR_DIRECTION,
37132 );
37133 transmute(src:r)
37134 }
37135}
37136
37137/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37138/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37139/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37140/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37141/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37142/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37143/// The sign is determined by sc which can take the following values:\
37144/// _MM_MANT_SIGN_src // sign = sign(src)\
37145/// _MM_MANT_SIGN_zero // sign = 0\
37146/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37147/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37148///
37149/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_sd&expand=2896)
37150#[inline]
37151#[target_feature(enable = "avx512f")]
37152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37153#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37154#[rustc_legacy_const_generics(4, 5)]
37155pub fn _mm_mask_getmant_sd<
37156 const NORM: _MM_MANTISSA_NORM_ENUM,
37157 const SIGN: _MM_MANTISSA_SIGN_ENUM,
37158>(
37159 src: __m128d,
37160 k: __mmask8,
37161 a: __m128d,
37162 b: __m128d,
37163) -> __m128d {
37164 unsafe {
37165 static_assert_uimm_bits!(NORM, 4);
37166 static_assert_uimm_bits!(SIGN, 2);
37167 let a: f64x2 = a.as_f64x2();
37168 let b: f64x2 = b.as_f64x2();
37169 let src: f64x2 = src.as_f64x2();
37170 let r: f64x2 = vgetmantsd(a, b, SIGN << 2 | NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
37171 transmute(src:r)
37172 }
37173}
37174
37175/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37176/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37177/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37178/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37179/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37180/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37181/// The sign is determined by sc which can take the following values:\
37182/// _MM_MANT_SIGN_src // sign = sign(src)\
37183/// _MM_MANT_SIGN_zero // sign = 0\
37184/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37185/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37186///
37187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_sd&expand=2897)
37188#[inline]
37189#[target_feature(enable = "avx512f")]
37190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37191#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37192#[rustc_legacy_const_generics(3, 4)]
37193pub fn _mm_maskz_getmant_sd<
37194 const NORM: _MM_MANTISSA_NORM_ENUM,
37195 const SIGN: _MM_MANTISSA_SIGN_ENUM,
37196>(
37197 k: __mmask8,
37198 a: __m128d,
37199 b: __m128d,
37200) -> __m128d {
37201 unsafe {
37202 static_assert_uimm_bits!(NORM, 4);
37203 static_assert_uimm_bits!(SIGN, 2);
37204 let a: f64x2 = a.as_f64x2();
37205 let b: f64x2 = b.as_f64x2();
37206 let r: f64x2 = vgetmantsd(
37207 a,
37208 b,
37209 SIGN << 2 | NORM,
37210 src:f64x2::ZERO,
37211 m:k,
37212 _MM_FROUND_CUR_DIRECTION,
37213 );
37214 transmute(src:r)
37215 }
37216}
37217
37218/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37219/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37220/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37221/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37222/// * [`_MM_FROUND_TO_POS_INF`] : round up
37223/// * [`_MM_FROUND_TO_ZERO`] : truncate
37224/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37225///
37226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_ss&expand=4802)
37227#[inline]
37228#[target_feature(enable = "avx512f")]
37229#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37230#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 255))]
37231#[rustc_legacy_const_generics(2)]
37232pub fn _mm_roundscale_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
37233 unsafe {
37234 static_assert_uimm_bits!(IMM8, 8);
37235 let a: f32x4 = a.as_f32x4();
37236 let b: f32x4 = b.as_f32x4();
37237 let r: f32x4 = vrndscaless(
37238 a,
37239 b,
37240 src:f32x4::ZERO,
37241 mask:0b11111111,
37242 IMM8,
37243 _MM_FROUND_CUR_DIRECTION,
37244 );
37245 transmute(src:r)
37246 }
37247}
37248
37249/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37250/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37251/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37252/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37253/// * [`_MM_FROUND_TO_POS_INF`] : round up
37254/// * [`_MM_FROUND_TO_ZERO`] : truncate
37255/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37256///
37257/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_ss&expand=4800)
37258#[inline]
37259#[target_feature(enable = "avx512f")]
37260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37261#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
37262#[rustc_legacy_const_generics(4)]
37263pub fn _mm_mask_roundscale_ss<const IMM8: i32>(
37264 src: __m128,
37265 k: __mmask8,
37266 a: __m128,
37267 b: __m128,
37268) -> __m128 {
37269 unsafe {
37270 static_assert_uimm_bits!(IMM8, 8);
37271 let a: f32x4 = a.as_f32x4();
37272 let b: f32x4 = b.as_f32x4();
37273 let src: f32x4 = src.as_f32x4();
37274 let r: f32x4 = vrndscaless(a, b, src, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
37275 transmute(src:r)
37276 }
37277}
37278
37279/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37280/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37281/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37282/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37283/// * [`_MM_FROUND_TO_POS_INF`] : round up
37284/// * [`_MM_FROUND_TO_ZERO`] : truncate
37285/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37286///
37287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_ss&expand=4801)
37288#[inline]
37289#[target_feature(enable = "avx512f")]
37290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37291#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
37292#[rustc_legacy_const_generics(3)]
37293pub fn _mm_maskz_roundscale_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37294 unsafe {
37295 static_assert_uimm_bits!(IMM8, 8);
37296 let a: f32x4 = a.as_f32x4();
37297 let b: f32x4 = b.as_f32x4();
37298 let r: f32x4 = vrndscaless(a, b, src:f32x4::ZERO, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
37299 transmute(src:r)
37300 }
37301}
37302
37303/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37304/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37305/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37306/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37307/// * [`_MM_FROUND_TO_POS_INF`] : round up
37308/// * [`_MM_FROUND_TO_ZERO`] : truncate
37309/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37310///
37311/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_sd&expand=4799)
37312#[inline]
37313#[target_feature(enable = "avx512f")]
37314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37315#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 255))]
37316#[rustc_legacy_const_generics(2)]
37317pub fn _mm_roundscale_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
37318 unsafe {
37319 static_assert_uimm_bits!(IMM8, 8);
37320 let a: f64x2 = a.as_f64x2();
37321 let b: f64x2 = b.as_f64x2();
37322 let r: f64x2 = vrndscalesd(
37323 a,
37324 b,
37325 src:f64x2::ZERO,
37326 mask:0b11111111,
37327 IMM8,
37328 _MM_FROUND_CUR_DIRECTION,
37329 );
37330 transmute(src:r)
37331 }
37332}
37333
37334/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37335/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37336/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37337/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37338/// * [`_MM_FROUND_TO_POS_INF`] : round up
37339/// * [`_MM_FROUND_TO_ZERO`] : truncate
37340/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37341///
37342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_sd&expand=4797)
37343#[inline]
37344#[target_feature(enable = "avx512f")]
37345#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37346#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
37347#[rustc_legacy_const_generics(4)]
37348pub fn _mm_mask_roundscale_sd<const IMM8: i32>(
37349 src: __m128d,
37350 k: __mmask8,
37351 a: __m128d,
37352 b: __m128d,
37353) -> __m128d {
37354 unsafe {
37355 static_assert_uimm_bits!(IMM8, 8);
37356 let a: f64x2 = a.as_f64x2();
37357 let b: f64x2 = b.as_f64x2();
37358 let src: f64x2 = src.as_f64x2();
37359 let r: f64x2 = vrndscalesd(a, b, src, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
37360 transmute(src:r)
37361 }
37362}
37363
37364/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37365/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37366/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37367/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37368/// * [`_MM_FROUND_TO_POS_INF`] : round up
37369/// * [`_MM_FROUND_TO_ZERO`] : truncate
37370/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37371///
37372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_sd&expand=4798)
37373#[inline]
37374#[target_feature(enable = "avx512f")]
37375#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37376#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
37377#[rustc_legacy_const_generics(3)]
37378pub fn _mm_maskz_roundscale_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37379 unsafe {
37380 static_assert_uimm_bits!(IMM8, 8);
37381 let a: f64x2 = a.as_f64x2();
37382 let b: f64x2 = b.as_f64x2();
37383 let r: f64x2 = vrndscalesd(a, b, src:f64x2::ZERO, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
37384 transmute(src:r)
37385 }
37386}
37387
37388/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37389///
37390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_ss&expand=4901)
37391#[inline]
37392#[target_feature(enable = "avx512f")]
37393#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37394#[cfg_attr(test, assert_instr(vscalefss))]
37395pub fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
37396 unsafe {
37397 let a: f32x4 = a.as_f32x4();
37398 let b: f32x4 = b.as_f32x4();
37399 transmute(src:vscalefss(
37400 a,
37401 b,
37402 src:f32x4::ZERO,
37403 mask:0b11111111,
37404 _MM_FROUND_CUR_DIRECTION,
37405 ))
37406 }
37407}
37408
37409/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37410///
37411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_ss&expand=4899)
37412#[inline]
37413#[target_feature(enable = "avx512f")]
37414#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37415#[cfg_attr(test, assert_instr(vscalefss))]
37416pub fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
37417 unsafe {
37418 let a: f32x4 = a.as_f32x4();
37419 let b: f32x4 = b.as_f32x4();
37420 let src: f32x4 = src.as_f32x4();
37421 transmute(src:vscalefss(a, b, src, mask:k, _MM_FROUND_CUR_DIRECTION))
37422 }
37423}
37424
37425/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37426///
37427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_ss&expand=4900)
37428#[inline]
37429#[target_feature(enable = "avx512f")]
37430#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37431#[cfg_attr(test, assert_instr(vscalefss))]
37432pub fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37433 unsafe {
37434 transmute(src:vscalefss(
37435 a.as_f32x4(),
37436 b.as_f32x4(),
37437 src:f32x4::ZERO,
37438 mask:k,
37439 _MM_FROUND_CUR_DIRECTION,
37440 ))
37441 }
37442}
37443
37444/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
37445///
37446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_sd&expand=4898)
37447#[inline]
37448#[target_feature(enable = "avx512f")]
37449#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37450#[cfg_attr(test, assert_instr(vscalefsd))]
37451pub fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
37452 unsafe {
37453 transmute(src:vscalefsd(
37454 a.as_f64x2(),
37455 b.as_f64x2(),
37456 src:f64x2::ZERO,
37457 mask:0b11111111,
37458 _MM_FROUND_CUR_DIRECTION,
37459 ))
37460 }
37461}
37462
37463/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37464///
37465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_sd&expand=4896)
37466#[inline]
37467#[target_feature(enable = "avx512f")]
37468#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37469#[cfg_attr(test, assert_instr(vscalefsd))]
37470pub fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37471 unsafe {
37472 transmute(src:vscalefsd(
37473 a.as_f64x2(),
37474 b.as_f64x2(),
37475 src.as_f64x2(),
37476 mask:k,
37477 _MM_FROUND_CUR_DIRECTION,
37478 ))
37479 }
37480}
37481
37482/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37483///
37484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_sd&expand=4897)
37485#[inline]
37486#[target_feature(enable = "avx512f")]
37487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37488#[cfg_attr(test, assert_instr(vscalefsd))]
37489pub fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37490 unsafe {
37491 transmute(src:vscalefsd(
37492 a.as_f64x2(),
37493 b.as_f64x2(),
37494 src:f64x2::ZERO,
37495 mask:k,
37496 _MM_FROUND_CUR_DIRECTION,
37497 ))
37498 }
37499}
37500
37501/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37502///
37503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_ss&expand=2582)
37504#[inline]
37505#[target_feature(enable = "avx512f")]
37506#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37507#[cfg_attr(test, assert_instr(vfmadd))]
37508pub fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37509 unsafe {
37510 let mut fmadd: f32 = simd_extract!(a, 0);
37511 if (k & 0b00000001) != 0 {
37512 let extractb: f32 = simd_extract!(b, 0);
37513 let extractc: f32 = simd_extract!(c, 0);
37514 fmadd = fmaf32(a:fmadd, b:extractb, c:extractc);
37515 }
37516 simd_insert!(a, 0, fmadd)
37517 }
37518}
37519
37520/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37521///
37522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_ss&expand=2584)
37523#[inline]
37524#[target_feature(enable = "avx512f")]
37525#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37526#[cfg_attr(test, assert_instr(vfmadd))]
37527pub fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37528 unsafe {
37529 let mut fmadd: f32 = 0.;
37530 if (k & 0b00000001) != 0 {
37531 let extracta: f32 = simd_extract!(a, 0);
37532 let extractb: f32 = simd_extract!(b, 0);
37533 let extractc: f32 = simd_extract!(c, 0);
37534 fmadd = fmaf32(a:extracta, b:extractb, c:extractc);
37535 }
37536 simd_insert!(a, 0, fmadd)
37537 }
37538}
37539
37540/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37541///
37542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_ss&expand=2583)
37543#[inline]
37544#[target_feature(enable = "avx512f")]
37545#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37546#[cfg_attr(test, assert_instr(vfmadd))]
37547pub fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37548 unsafe {
37549 let mut fmadd: f32 = simd_extract!(c, 0);
37550 if (k & 0b00000001) != 0 {
37551 let extracta: f32 = simd_extract!(a, 0);
37552 let extractb: f32 = simd_extract!(b, 0);
37553 fmadd = fmaf32(a:extracta, b:extractb, c:fmadd);
37554 }
37555 simd_insert!(c, 0, fmadd)
37556 }
37557}
37558
37559/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37560///
37561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_sd&expand=2578)
37562#[inline]
37563#[target_feature(enable = "avx512f")]
37564#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37565#[cfg_attr(test, assert_instr(vfmadd))]
37566pub fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37567 unsafe {
37568 let mut fmadd: f64 = simd_extract!(a, 0);
37569 if (k & 0b00000001) != 0 {
37570 let extractb: f64 = simd_extract!(b, 0);
37571 let extractc: f64 = simd_extract!(c, 0);
37572 fmadd = fmaf64(a:fmadd, b:extractb, c:extractc);
37573 }
37574 simd_insert!(a, 0, fmadd)
37575 }
37576}
37577
37578/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37579///
37580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_sd&expand=2580)
37581#[inline]
37582#[target_feature(enable = "avx512f")]
37583#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37584#[cfg_attr(test, assert_instr(vfmadd))]
37585pub fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37586 unsafe {
37587 let mut fmadd: f64 = 0.;
37588 if (k & 0b00000001) != 0 {
37589 let extracta: f64 = simd_extract!(a, 0);
37590 let extractb: f64 = simd_extract!(b, 0);
37591 let extractc: f64 = simd_extract!(c, 0);
37592 fmadd = fmaf64(a:extracta, b:extractb, c:extractc);
37593 }
37594 simd_insert!(a, 0, fmadd)
37595 }
37596}
37597
37598/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37599///
37600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_sd&expand=2579)
37601#[inline]
37602#[target_feature(enable = "avx512f")]
37603#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37604#[cfg_attr(test, assert_instr(vfmadd))]
37605pub fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37606 unsafe {
37607 let mut fmadd: f64 = simd_extract!(c, 0);
37608 if (k & 0b00000001) != 0 {
37609 let extracta: f64 = simd_extract!(a, 0);
37610 let extractb: f64 = simd_extract!(b, 0);
37611 fmadd = fmaf64(a:extracta, b:extractb, c:fmadd);
37612 }
37613 simd_insert!(c, 0, fmadd)
37614 }
37615}
37616
37617/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37618///
37619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_ss&expand=2668)
37620#[inline]
37621#[target_feature(enable = "avx512f")]
37622#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37623#[cfg_attr(test, assert_instr(vfmsub))]
37624pub fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37625 unsafe {
37626 let mut fmsub: f32 = simd_extract!(a, 0);
37627 if (k & 0b00000001) != 0 {
37628 let extractb: f32 = simd_extract!(b, 0);
37629 let extractc: f32 = simd_extract!(c, 0);
37630 let extractc: f32 = -extractc;
37631 fmsub = fmaf32(a:fmsub, b:extractb, c:extractc);
37632 }
37633 simd_insert!(a, 0, fmsub)
37634 }
37635}
37636
37637/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37638///
37639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_ss&expand=2670)
37640#[inline]
37641#[target_feature(enable = "avx512f")]
37642#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37643#[cfg_attr(test, assert_instr(vfmsub))]
37644pub fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37645 unsafe {
37646 let mut fmsub: f32 = 0.;
37647 if (k & 0b00000001) != 0 {
37648 let extracta: f32 = simd_extract!(a, 0);
37649 let extractb: f32 = simd_extract!(b, 0);
37650 let extractc: f32 = simd_extract!(c, 0);
37651 let extractc: f32 = -extractc;
37652 fmsub = fmaf32(a:extracta, b:extractb, c:extractc);
37653 }
37654 simd_insert!(a, 0, fmsub)
37655 }
37656}
37657
37658/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37659///
37660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_ss&expand=2669)
37661#[inline]
37662#[target_feature(enable = "avx512f")]
37663#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37664#[cfg_attr(test, assert_instr(vfmsub))]
37665pub fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37666 unsafe {
37667 let mut fmsub: f32 = simd_extract!(c, 0);
37668 if (k & 0b00000001) != 0 {
37669 let extracta: f32 = simd_extract!(a, 0);
37670 let extractb: f32 = simd_extract!(b, 0);
37671 let extractc: f32 = -fmsub;
37672 fmsub = fmaf32(a:extracta, b:extractb, c:extractc);
37673 }
37674 simd_insert!(c, 0, fmsub)
37675 }
37676}
37677
37678/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37679///
37680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_sd&expand=2664)
37681#[inline]
37682#[target_feature(enable = "avx512f")]
37683#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37684#[cfg_attr(test, assert_instr(vfmsub))]
37685pub fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37686 unsafe {
37687 let mut fmsub: f64 = simd_extract!(a, 0);
37688 if (k & 0b00000001) != 0 {
37689 let extractb: f64 = simd_extract!(b, 0);
37690 let extractc: f64 = simd_extract!(c, 0);
37691 let extractc: f64 = -extractc;
37692 fmsub = fmaf64(a:fmsub, b:extractb, c:extractc);
37693 }
37694 simd_insert!(a, 0, fmsub)
37695 }
37696}
37697
37698/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37699///
37700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_sd&expand=2666)
37701#[inline]
37702#[target_feature(enable = "avx512f")]
37703#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37704#[cfg_attr(test, assert_instr(vfmsub))]
37705pub fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37706 unsafe {
37707 let mut fmsub: f64 = 0.;
37708 if (k & 0b00000001) != 0 {
37709 let extracta: f64 = simd_extract!(a, 0);
37710 let extractb: f64 = simd_extract!(b, 0);
37711 let extractc: f64 = simd_extract!(c, 0);
37712 let extractc: f64 = -extractc;
37713 fmsub = fmaf64(a:extracta, b:extractb, c:extractc);
37714 }
37715 simd_insert!(a, 0, fmsub)
37716 }
37717}
37718
37719/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37720///
37721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_sd&expand=2665)
37722#[inline]
37723#[target_feature(enable = "avx512f")]
37724#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37725#[cfg_attr(test, assert_instr(vfmsub))]
37726pub fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37727 unsafe {
37728 let mut fmsub: f64 = simd_extract!(c, 0);
37729 if (k & 0b00000001) != 0 {
37730 let extracta: f64 = simd_extract!(a, 0);
37731 let extractb: f64 = simd_extract!(b, 0);
37732 let extractc: f64 = -fmsub;
37733 fmsub = fmaf64(a:extracta, b:extractb, c:extractc);
37734 }
37735 simd_insert!(c, 0, fmsub)
37736 }
37737}
37738
37739/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37740///
37741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_ss&expand=2748)
37742#[inline]
37743#[target_feature(enable = "avx512f")]
37744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37745#[cfg_attr(test, assert_instr(vfnmadd))]
37746pub fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37747 unsafe {
37748 let mut fnmadd: f32 = simd_extract!(a, 0);
37749 if (k & 0b00000001) != 0 {
37750 let extracta: f32 = -fnmadd;
37751 let extractb: f32 = simd_extract!(b, 0);
37752 let extractc: f32 = simd_extract!(c, 0);
37753 fnmadd = fmaf32(a:extracta, b:extractb, c:extractc);
37754 }
37755 simd_insert!(a, 0, fnmadd)
37756 }
37757}
37758
37759/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37760///
37761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_ss&expand=2750)
37762#[inline]
37763#[target_feature(enable = "avx512f")]
37764#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37765#[cfg_attr(test, assert_instr(vfnmadd))]
37766pub fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37767 unsafe {
37768 let mut fnmadd: f32 = 0.;
37769 if (k & 0b00000001) != 0 {
37770 let extracta: f32 = simd_extract!(a, 0);
37771 let extracta: f32 = -extracta;
37772 let extractb: f32 = simd_extract!(b, 0);
37773 let extractc: f32 = simd_extract!(c, 0);
37774 fnmadd = fmaf32(a:extracta, b:extractb, c:extractc);
37775 }
37776 simd_insert!(a, 0, fnmadd)
37777 }
37778}
37779
37780/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37781///
37782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_ss&expand=2749)
37783#[inline]
37784#[target_feature(enable = "avx512f")]
37785#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37786#[cfg_attr(test, assert_instr(vfnmadd))]
37787pub fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37788 unsafe {
37789 let mut fnmadd: f32 = simd_extract!(c, 0);
37790 if (k & 0b00000001) != 0 {
37791 let extracta: f32 = simd_extract!(a, 0);
37792 let extracta: f32 = -extracta;
37793 let extractb: f32 = simd_extract!(b, 0);
37794 fnmadd = fmaf32(a:extracta, b:extractb, c:fnmadd);
37795 }
37796 simd_insert!(c, 0, fnmadd)
37797 }
37798}
37799
37800/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37801///
37802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_sd&expand=2744)
37803#[inline]
37804#[target_feature(enable = "avx512f")]
37805#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37806#[cfg_attr(test, assert_instr(vfnmadd))]
37807pub fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37808 unsafe {
37809 let mut fnmadd: f64 = simd_extract!(a, 0);
37810 if (k & 0b00000001) != 0 {
37811 let extracta: f64 = -fnmadd;
37812 let extractb: f64 = simd_extract!(b, 0);
37813 let extractc: f64 = simd_extract!(c, 0);
37814 fnmadd = fmaf64(a:extracta, b:extractb, c:extractc);
37815 }
37816 simd_insert!(a, 0, fnmadd)
37817 }
37818}
37819
37820/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37821///
37822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_sd&expand=2746)
37823#[inline]
37824#[target_feature(enable = "avx512f")]
37825#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37826#[cfg_attr(test, assert_instr(vfnmadd))]
37827pub fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37828 unsafe {
37829 let mut fnmadd: f64 = 0.;
37830 if (k & 0b00000001) != 0 {
37831 let extracta: f64 = simd_extract!(a, 0);
37832 let extracta: f64 = -extracta;
37833 let extractb: f64 = simd_extract!(b, 0);
37834 let extractc: f64 = simd_extract!(c, 0);
37835 fnmadd = fmaf64(a:extracta, b:extractb, c:extractc);
37836 }
37837 simd_insert!(a, 0, fnmadd)
37838 }
37839}
37840
37841/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37842///
37843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_sd&expand=2745)
37844#[inline]
37845#[target_feature(enable = "avx512f")]
37846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37847#[cfg_attr(test, assert_instr(vfnmadd))]
37848pub fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37849 unsafe {
37850 let mut fnmadd: f64 = simd_extract!(c, 0);
37851 if (k & 0b00000001) != 0 {
37852 let extracta: f64 = simd_extract!(a, 0);
37853 let extracta: f64 = -extracta;
37854 let extractb: f64 = simd_extract!(b, 0);
37855 fnmadd = fmaf64(a:extracta, b:extractb, c:fnmadd);
37856 }
37857 simd_insert!(c, 0, fnmadd)
37858 }
37859}
37860
37861/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37862///
37863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_ss&expand=2796)
37864#[inline]
37865#[target_feature(enable = "avx512f")]
37866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37867#[cfg_attr(test, assert_instr(vfnmsub))]
37868pub fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37869 unsafe {
37870 let mut fnmsub: f32 = simd_extract!(a, 0);
37871 if (k & 0b00000001) != 0 {
37872 let extracta: f32 = -fnmsub;
37873 let extractb: f32 = simd_extract!(b, 0);
37874 let extractc: f32 = simd_extract!(c, 0);
37875 let extractc: f32 = -extractc;
37876 fnmsub = fmaf32(a:extracta, b:extractb, c:extractc);
37877 }
37878 simd_insert!(a, 0, fnmsub)
37879 }
37880}
37881
37882/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37883///
37884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_ss&expand=2798)
37885#[inline]
37886#[target_feature(enable = "avx512f")]
37887#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37888#[cfg_attr(test, assert_instr(vfnmsub))]
37889pub fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37890 unsafe {
37891 let mut fnmsub: f32 = 0.;
37892 if (k & 0b00000001) != 0 {
37893 let extracta: f32 = simd_extract!(a, 0);
37894 let extracta: f32 = -extracta;
37895 let extractb: f32 = simd_extract!(b, 0);
37896 let extractc: f32 = simd_extract!(c, 0);
37897 let extractc: f32 = -extractc;
37898 fnmsub = fmaf32(a:extracta, b:extractb, c:extractc);
37899 }
37900 simd_insert!(a, 0, fnmsub)
37901 }
37902}
37903
37904/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37905///
37906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_ss&expand=2797)
37907#[inline]
37908#[target_feature(enable = "avx512f")]
37909#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37910#[cfg_attr(test, assert_instr(vfnmsub))]
37911pub fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37912 unsafe {
37913 let mut fnmsub: f32 = simd_extract!(c, 0);
37914 if (k & 0b00000001) != 0 {
37915 let extracta: f32 = simd_extract!(a, 0);
37916 let extracta: f32 = -extracta;
37917 let extractb: f32 = simd_extract!(b, 0);
37918 let extractc: f32 = -fnmsub;
37919 fnmsub = fmaf32(a:extracta, b:extractb, c:extractc);
37920 }
37921 simd_insert!(c, 0, fnmsub)
37922 }
37923}
37924
37925/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37926///
37927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_sd&expand=2792)
37928#[inline]
37929#[target_feature(enable = "avx512f")]
37930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37931#[cfg_attr(test, assert_instr(vfnmsub))]
37932pub fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37933 unsafe {
37934 let mut fnmsub: f64 = simd_extract!(a, 0);
37935 if (k & 0b00000001) != 0 {
37936 let extracta: f64 = -fnmsub;
37937 let extractb: f64 = simd_extract!(b, 0);
37938 let extractc: f64 = simd_extract!(c, 0);
37939 let extractc: f64 = -extractc;
37940 fnmsub = fmaf64(a:extracta, b:extractb, c:extractc);
37941 }
37942 simd_insert!(a, 0, fnmsub)
37943 }
37944}
37945
37946/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37947///
37948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_sd&expand=2794)
37949#[inline]
37950#[target_feature(enable = "avx512f")]
37951#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37952#[cfg_attr(test, assert_instr(vfnmsub))]
37953pub fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37954 unsafe {
37955 let mut fnmsub: f64 = 0.;
37956 if (k & 0b00000001) != 0 {
37957 let extracta: f64 = simd_extract!(a, 0);
37958 let extracta: f64 = -extracta;
37959 let extractb: f64 = simd_extract!(b, 0);
37960 let extractc: f64 = simd_extract!(c, 0);
37961 let extractc: f64 = -extractc;
37962 fnmsub = fmaf64(a:extracta, b:extractb, c:extractc);
37963 }
37964 simd_insert!(a, 0, fnmsub)
37965 }
37966}
37967
37968/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37969///
37970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_sd&expand=2793)
37971#[inline]
37972#[target_feature(enable = "avx512f")]
37973#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
37974#[cfg_attr(test, assert_instr(vfnmsub))]
37975pub fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37976 unsafe {
37977 let mut fnmsub: f64 = simd_extract!(c, 0);
37978 if (k & 0b00000001) != 0 {
37979 let extracta: f64 = simd_extract!(a, 0);
37980 let extracta: f64 = -extracta;
37981 let extractb: f64 = simd_extract!(b, 0);
37982 let extractc: f64 = -fnmsub;
37983 fnmsub = fmaf64(a:extracta, b:extractb, c:extractc);
37984 }
37985 simd_insert!(c, 0, fnmsub)
37986 }
37987}
37988
37989/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37990///
37991/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37992/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37993/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37994/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37995/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37996/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37997///
37998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_ss&expand=151)
37999#[inline]
38000#[target_feature(enable = "avx512f")]
38001#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38002#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
38003#[rustc_legacy_const_generics(2)]
38004pub fn _mm_add_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38005 unsafe {
38006 static_assert_rounding!(ROUNDING);
38007 let a: f32x4 = a.as_f32x4();
38008 let b: f32x4 = b.as_f32x4();
38009 let r: f32x4 = vaddss(a, b, src:f32x4::ZERO, mask:0b1, ROUNDING);
38010 transmute(src:r)
38011 }
38012}
38013
38014/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38015///
38016/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38017/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38018/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38019/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38020/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38021/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38022///
38023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_ss&expand=152)
38024#[inline]
38025#[target_feature(enable = "avx512f")]
38026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38027#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
38028#[rustc_legacy_const_generics(4)]
38029pub fn _mm_mask_add_round_ss<const ROUNDING: i32>(
38030 src: __m128,
38031 k: __mmask8,
38032 a: __m128,
38033 b: __m128,
38034) -> __m128 {
38035 unsafe {
38036 static_assert_rounding!(ROUNDING);
38037 let a: f32x4 = a.as_f32x4();
38038 let b: f32x4 = b.as_f32x4();
38039 let src: f32x4 = src.as_f32x4();
38040 let r: f32x4 = vaddss(a, b, src, mask:k, ROUNDING);
38041 transmute(src:r)
38042 }
38043}
38044
38045/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38046///
38047/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38048/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38049/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38050/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38051/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38052/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38053///
38054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_ss&expand=153)
38055#[inline]
38056#[target_feature(enable = "avx512f")]
38057#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38058#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
38059#[rustc_legacy_const_generics(3)]
38060pub fn _mm_maskz_add_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38061 unsafe {
38062 static_assert_rounding!(ROUNDING);
38063 let a: f32x4 = a.as_f32x4();
38064 let b: f32x4 = b.as_f32x4();
38065 let r: f32x4 = vaddss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
38066 transmute(src:r)
38067 }
38068}
38069
38070/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38071///
38072/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38073/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38074/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38075/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38076/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38077/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38078///
38079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_sd&expand=148)
38080#[inline]
38081#[target_feature(enable = "avx512f")]
38082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38083#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38084#[rustc_legacy_const_generics(2)]
38085pub fn _mm_add_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38086 unsafe {
38087 static_assert_rounding!(ROUNDING);
38088 let a: f64x2 = a.as_f64x2();
38089 let b: f64x2 = b.as_f64x2();
38090 let r: f64x2 = vaddsd(a, b, src:f64x2::ZERO, mask:0b1, ROUNDING);
38091 transmute(src:r)
38092 }
38093}
38094
38095/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38096///
38097/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38098/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38099/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38100/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38101/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38102/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38103///
38104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_sd&expand=149)
38105#[inline]
38106#[target_feature(enable = "avx512f")]
38107#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38108#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38109#[rustc_legacy_const_generics(4)]
38110pub fn _mm_mask_add_round_sd<const ROUNDING: i32>(
38111 src: __m128d,
38112 k: __mmask8,
38113 a: __m128d,
38114 b: __m128d,
38115) -> __m128d {
38116 unsafe {
38117 static_assert_rounding!(ROUNDING);
38118 let a: f64x2 = a.as_f64x2();
38119 let b: f64x2 = b.as_f64x2();
38120 let src: f64x2 = src.as_f64x2();
38121 let r: f64x2 = vaddsd(a, b, src, mask:k, ROUNDING);
38122 transmute(src:r)
38123 }
38124}
38125
38126/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38127///
38128/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38129/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38130/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38131/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38132/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38133/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38134///
38135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_sd&expand=150)
38136#[inline]
38137#[target_feature(enable = "avx512f")]
38138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38139#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38140#[rustc_legacy_const_generics(3)]
38141pub fn _mm_maskz_add_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38142 unsafe {
38143 static_assert_rounding!(ROUNDING);
38144 let a: f64x2 = a.as_f64x2();
38145 let b: f64x2 = b.as_f64x2();
38146 let r: f64x2 = vaddsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
38147 transmute(src:r)
38148 }
38149}
38150
38151/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38152///
38153/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38154/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38155/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38156/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38157/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38158/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38159///
38160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_ss&expand=5745)
38161#[inline]
38162#[target_feature(enable = "avx512f")]
38163#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38164#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38165#[rustc_legacy_const_generics(2)]
38166pub fn _mm_sub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38167 unsafe {
38168 static_assert_rounding!(ROUNDING);
38169 let a: f32x4 = a.as_f32x4();
38170 let b: f32x4 = b.as_f32x4();
38171 let r: f32x4 = vsubss(a, b, src:f32x4::ZERO, mask:0b1, ROUNDING);
38172 transmute(src:r)
38173 }
38174}
38175
38176/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38177///
38178/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38179/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38180/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38181/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38182/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38183/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38184///
38185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_ss&expand=5743)
38186#[inline]
38187#[target_feature(enable = "avx512f")]
38188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38189#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38190#[rustc_legacy_const_generics(4)]
38191pub fn _mm_mask_sub_round_ss<const ROUNDING: i32>(
38192 src: __m128,
38193 k: __mmask8,
38194 a: __m128,
38195 b: __m128,
38196) -> __m128 {
38197 unsafe {
38198 static_assert_rounding!(ROUNDING);
38199 let a: f32x4 = a.as_f32x4();
38200 let b: f32x4 = b.as_f32x4();
38201 let src: f32x4 = src.as_f32x4();
38202 let r: f32x4 = vsubss(a, b, src, mask:k, ROUNDING);
38203 transmute(src:r)
38204 }
38205}
38206
38207/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38208///
38209/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38210/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38211/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38212/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38213/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38214/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38215///
38216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_ss&expand=5744)
38217#[inline]
38218#[target_feature(enable = "avx512f")]
38219#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38220#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38221#[rustc_legacy_const_generics(3)]
38222pub fn _mm_maskz_sub_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38223 unsafe {
38224 static_assert_rounding!(ROUNDING);
38225 let a: f32x4 = a.as_f32x4();
38226 let b: f32x4 = b.as_f32x4();
38227 let r: f32x4 = vsubss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
38228 transmute(src:r)
38229 }
38230}
38231
38232/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38233///
38234/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38235/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38236/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38237/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38238/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38239/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38240///
38241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_sd&expand=5742)
38242#[inline]
38243#[target_feature(enable = "avx512f")]
38244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38245#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38246#[rustc_legacy_const_generics(2)]
38247pub fn _mm_sub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38248 unsafe {
38249 static_assert_rounding!(ROUNDING);
38250 let a: f64x2 = a.as_f64x2();
38251 let b: f64x2 = b.as_f64x2();
38252 let r: f64x2 = vsubsd(a, b, src:f64x2::ZERO, mask:0b1, ROUNDING);
38253 transmute(src:r)
38254 }
38255}
38256
38257/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38258///
38259/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38260/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38261/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38262/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38263/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38264/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38265///
38266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_sd&expand=5740)
38267#[inline]
38268#[target_feature(enable = "avx512f")]
38269#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38270#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38271#[rustc_legacy_const_generics(4)]
38272pub fn _mm_mask_sub_round_sd<const ROUNDING: i32>(
38273 src: __m128d,
38274 k: __mmask8,
38275 a: __m128d,
38276 b: __m128d,
38277) -> __m128d {
38278 unsafe {
38279 static_assert_rounding!(ROUNDING);
38280 let a: f64x2 = a.as_f64x2();
38281 let b: f64x2 = b.as_f64x2();
38282 let src: f64x2 = src.as_f64x2();
38283 let r: f64x2 = vsubsd(a, b, src, mask:k, ROUNDING);
38284 transmute(src:r)
38285 }
38286}
38287
38288/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38289///
38290/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38291/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38292/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38293/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38294/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38295/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38296///
38297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_sd&expand=5741)
38298#[inline]
38299#[target_feature(enable = "avx512f")]
38300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38301#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38302#[rustc_legacy_const_generics(3)]
38303pub fn _mm_maskz_sub_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38304 unsafe {
38305 static_assert_rounding!(ROUNDING);
38306 let a: f64x2 = a.as_f64x2();
38307 let b: f64x2 = b.as_f64x2();
38308 let r: f64x2 = vsubsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
38309 transmute(src:r)
38310 }
38311}
38312
38313/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38314///
38315/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38316/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38317/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38318/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38319/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38320/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38321///
38322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_ss&expand=3946)
38323#[inline]
38324#[target_feature(enable = "avx512f")]
38325#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38326#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38327#[rustc_legacy_const_generics(2)]
38328pub fn _mm_mul_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38329 unsafe {
38330 static_assert_rounding!(ROUNDING);
38331 let a: f32x4 = a.as_f32x4();
38332 let b: f32x4 = b.as_f32x4();
38333 let r: f32x4 = vmulss(a, b, src:f32x4::ZERO, mask:0b1, ROUNDING);
38334 transmute(src:r)
38335 }
38336}
38337
38338/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38339///
38340/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38341/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38342/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38343/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38344/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38345/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38346///
38347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_ss&expand=3944)
38348#[inline]
38349#[target_feature(enable = "avx512f")]
38350#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38351#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38352#[rustc_legacy_const_generics(4)]
38353pub fn _mm_mask_mul_round_ss<const ROUNDING: i32>(
38354 src: __m128,
38355 k: __mmask8,
38356 a: __m128,
38357 b: __m128,
38358) -> __m128 {
38359 unsafe {
38360 static_assert_rounding!(ROUNDING);
38361 let a: f32x4 = a.as_f32x4();
38362 let b: f32x4 = b.as_f32x4();
38363 let src: f32x4 = src.as_f32x4();
38364 let r: f32x4 = vmulss(a, b, src, mask:k, ROUNDING);
38365 transmute(src:r)
38366 }
38367}
38368
38369/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38370///
38371/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38372/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38373/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38374/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38375/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38376/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38377///
38378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_ss&expand=3945)
38379#[inline]
38380#[target_feature(enable = "avx512f")]
38381#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38382#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38383#[rustc_legacy_const_generics(3)]
38384pub fn _mm_maskz_mul_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38385 unsafe {
38386 static_assert_rounding!(ROUNDING);
38387 let a: f32x4 = a.as_f32x4();
38388 let b: f32x4 = b.as_f32x4();
38389 let r: f32x4 = vmulss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
38390 transmute(src:r)
38391 }
38392}
38393
38394/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38395///
38396/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38397/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38398/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38399/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38400/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38401/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38402///
38403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_sd&expand=3943)
38404#[inline]
38405#[target_feature(enable = "avx512f")]
38406#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38407#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38408#[rustc_legacy_const_generics(2)]
38409pub fn _mm_mul_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38410 unsafe {
38411 static_assert_rounding!(ROUNDING);
38412 let a: f64x2 = a.as_f64x2();
38413 let b: f64x2 = b.as_f64x2();
38414 let r: f64x2 = vmulsd(a, b, src:f64x2::ZERO, mask:0b1, ROUNDING);
38415 transmute(src:r)
38416 }
38417}
38418
38419/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38420///
38421/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38422/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38423/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38424/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38425/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38426/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38427///
38428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_sd&expand=3941)
38429#[inline]
38430#[target_feature(enable = "avx512f")]
38431#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38432#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38433#[rustc_legacy_const_generics(4)]
38434pub fn _mm_mask_mul_round_sd<const ROUNDING: i32>(
38435 src: __m128d,
38436 k: __mmask8,
38437 a: __m128d,
38438 b: __m128d,
38439) -> __m128d {
38440 unsafe {
38441 static_assert_rounding!(ROUNDING);
38442 let a: f64x2 = a.as_f64x2();
38443 let b: f64x2 = b.as_f64x2();
38444 let src: f64x2 = src.as_f64x2();
38445 let r: f64x2 = vmulsd(a, b, src, mask:k, ROUNDING);
38446 transmute(src:r)
38447 }
38448}
38449
38450/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38451///
38452/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38453/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38454/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38455/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38456/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38457/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38458///
38459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_sd&expand=3942)
38460#[inline]
38461#[target_feature(enable = "avx512f")]
38462#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38463#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38464#[rustc_legacy_const_generics(3)]
38465pub fn _mm_maskz_mul_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38466 unsafe {
38467 static_assert_rounding!(ROUNDING);
38468 let a: f64x2 = a.as_f64x2();
38469 let b: f64x2 = b.as_f64x2();
38470 let r: f64x2 = vmulsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
38471 transmute(src:r)
38472 }
38473}
38474
38475/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38476///
38477/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38478/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38479/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38480/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38481/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38482/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38483///
38484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_ss&expand=2174)
38485#[inline]
38486#[target_feature(enable = "avx512f")]
38487#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38488#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38489#[rustc_legacy_const_generics(2)]
38490pub fn _mm_div_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38491 unsafe {
38492 static_assert_rounding!(ROUNDING);
38493 let a: f32x4 = a.as_f32x4();
38494 let b: f32x4 = b.as_f32x4();
38495 let r: f32x4 = vdivss(a, b, src:f32x4::ZERO, mask:0b1, ROUNDING);
38496 transmute(src:r)
38497 }
38498}
38499
38500/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38501///
38502/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38503/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38504/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38505/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38506/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38507/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38508///
38509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_ss&expand=2175)
38510#[inline]
38511#[target_feature(enable = "avx512f")]
38512#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38513#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38514#[rustc_legacy_const_generics(4)]
38515pub fn _mm_mask_div_round_ss<const ROUNDING: i32>(
38516 src: __m128,
38517 k: __mmask8,
38518 a: __m128,
38519 b: __m128,
38520) -> __m128 {
38521 unsafe {
38522 static_assert_rounding!(ROUNDING);
38523 let a: f32x4 = a.as_f32x4();
38524 let b: f32x4 = b.as_f32x4();
38525 let src: f32x4 = src.as_f32x4();
38526 let r: f32x4 = vdivss(a, b, src, mask:k, ROUNDING);
38527 transmute(src:r)
38528 }
38529}
38530
38531/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38532///
38533/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38534/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38535/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38536/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38537/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38538/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38539///
38540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_ss&expand=2176)
38541#[inline]
38542#[target_feature(enable = "avx512f")]
38543#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38544#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38545#[rustc_legacy_const_generics(3)]
38546pub fn _mm_maskz_div_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38547 unsafe {
38548 static_assert_rounding!(ROUNDING);
38549 let a: f32x4 = a.as_f32x4();
38550 let b: f32x4 = b.as_f32x4();
38551 let r: f32x4 = vdivss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
38552 transmute(src:r)
38553 }
38554}
38555
38556/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38557///
38558/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38559/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38560/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38561/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38562/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38563/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38564///
38565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_sd&expand=2171)
38566#[inline]
38567#[target_feature(enable = "avx512f")]
38568#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38569#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38570#[rustc_legacy_const_generics(2)]
38571pub fn _mm_div_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38572 unsafe {
38573 static_assert_rounding!(ROUNDING);
38574 let a: f64x2 = a.as_f64x2();
38575 let b: f64x2 = b.as_f64x2();
38576 let r: f64x2 = vdivsd(a, b, src:f64x2::ZERO, mask:0b1, ROUNDING);
38577 transmute(src:r)
38578 }
38579}
38580
38581/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38582///
38583/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38584/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38585/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38586/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38587/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38588/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38589///
38590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_sd&expand=2172)
38591#[inline]
38592#[target_feature(enable = "avx512f")]
38593#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38594#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38595#[rustc_legacy_const_generics(4)]
38596pub fn _mm_mask_div_round_sd<const ROUNDING: i32>(
38597 src: __m128d,
38598 k: __mmask8,
38599 a: __m128d,
38600 b: __m128d,
38601) -> __m128d {
38602 unsafe {
38603 static_assert_rounding!(ROUNDING);
38604 let a: f64x2 = a.as_f64x2();
38605 let b: f64x2 = b.as_f64x2();
38606 let src: f64x2 = src.as_f64x2();
38607 let r: f64x2 = vdivsd(a, b, src, mask:k, ROUNDING);
38608 transmute(src:r)
38609 }
38610}
38611
38612/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38613///
38614/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38615/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38616/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38617/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38618/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38619/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38620///
38621/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_sd&expand=2173)
38622#[inline]
38623#[target_feature(enable = "avx512f")]
38624#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38625#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38626#[rustc_legacy_const_generics(3)]
38627pub fn _mm_maskz_div_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38628 unsafe {
38629 static_assert_rounding!(ROUNDING);
38630 let a: f64x2 = a.as_f64x2();
38631 let b: f64x2 = b.as_f64x2();
38632 let r: f64x2 = vdivsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
38633 transmute(src:r)
38634 }
38635}
38636
38637/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38638/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38639///
38640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_ss&expand=3668)
38641#[inline]
38642#[target_feature(enable = "avx512f")]
38643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38644#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38645#[rustc_legacy_const_generics(2)]
38646pub fn _mm_max_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38647 unsafe {
38648 static_assert_sae!(SAE);
38649 let a: f32x4 = a.as_f32x4();
38650 let b: f32x4 = b.as_f32x4();
38651 let r: f32x4 = vmaxss(a, b, src:f32x4::ZERO, mask:0b1, SAE);
38652 transmute(src:r)
38653 }
38654}
38655
38656/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38657/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38658///
38659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_round_ss&expand=3672)
38660#[inline]
38661#[target_feature(enable = "avx512f")]
38662#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38663#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38664#[rustc_legacy_const_generics(4)]
38665pub fn _mm_mask_max_round_ss<const SAE: i32>(
38666 src: __m128,
38667 k: __mmask8,
38668 a: __m128,
38669 b: __m128,
38670) -> __m128 {
38671 unsafe {
38672 static_assert_sae!(SAE);
38673 let a: f32x4 = a.as_f32x4();
38674 let b: f32x4 = b.as_f32x4();
38675 let src: f32x4 = src.as_f32x4();
38676 let r: f32x4 = vmaxss(a, b, src, mask:k, SAE);
38677 transmute(src:r)
38678 }
38679}
38680
38681/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38682/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38683///
38684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_round_ss&expand=3667)
38685#[inline]
38686#[target_feature(enable = "avx512f")]
38687#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38688#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38689#[rustc_legacy_const_generics(3)]
38690pub fn _mm_maskz_max_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38691 unsafe {
38692 static_assert_sae!(SAE);
38693 let a: f32x4 = a.as_f32x4();
38694 let b: f32x4 = b.as_f32x4();
38695 let r: f32x4 = vmaxss(a, b, src:f32x4::ZERO, mask:k, SAE);
38696 transmute(src:r)
38697 }
38698}
38699
38700/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38701/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38702///
38703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_sd&expand=3665)
38704#[inline]
38705#[target_feature(enable = "avx512f")]
38706#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38707#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38708#[rustc_legacy_const_generics(2)]
38709pub fn _mm_max_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38710 unsafe {
38711 static_assert_sae!(SAE);
38712 let a: f64x2 = a.as_f64x2();
38713 let b: f64x2 = b.as_f64x2();
38714 let r: f64x2 = vmaxsd(a, b, src:f64x2::ZERO, mask:0b1, SAE);
38715 transmute(src:r)
38716 }
38717}
38718
38719/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38720/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38721///
38722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_round_sd&expand=3663)
38723#[inline]
38724#[target_feature(enable = "avx512f")]
38725#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38726#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38727#[rustc_legacy_const_generics(4)]
38728pub fn _mm_mask_max_round_sd<const SAE: i32>(
38729 src: __m128d,
38730 k: __mmask8,
38731 a: __m128d,
38732 b: __m128d,
38733) -> __m128d {
38734 unsafe {
38735 static_assert_sae!(SAE);
38736 let a: f64x2 = a.as_f64x2();
38737 let b: f64x2 = b.as_f64x2();
38738 let src: f64x2 = src.as_f64x2();
38739 let r: f64x2 = vmaxsd(a, b, src, mask:k, SAE);
38740 transmute(src:r)
38741 }
38742}
38743
38744/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38745/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38746///
38747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_round_sd&expand=3670)
38748#[inline]
38749#[target_feature(enable = "avx512f")]
38750#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38751#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38752#[rustc_legacy_const_generics(3)]
38753pub fn _mm_maskz_max_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38754 unsafe {
38755 static_assert_sae!(SAE);
38756 let a: f64x2 = a.as_f64x2();
38757 let b: f64x2 = b.as_f64x2();
38758 let r: f64x2 = vmaxsd(a, b, src:f64x2::ZERO, mask:k, SAE);
38759 transmute(src:r)
38760 }
38761}
38762
38763/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38764/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38765///
38766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_ss&expand=3782)
38767#[inline]
38768#[target_feature(enable = "avx512f")]
38769#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38770#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38771#[rustc_legacy_const_generics(2)]
38772pub fn _mm_min_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38773 unsafe {
38774 static_assert_sae!(SAE);
38775 let a: f32x4 = a.as_f32x4();
38776 let b: f32x4 = b.as_f32x4();
38777 let r: f32x4 = vminss(a, b, src:f32x4::ZERO, mask:0b1, SAE);
38778 transmute(src:r)
38779 }
38780}
38781
38782/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38783/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38784///
38785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_ss&expand=3780)
38786#[inline]
38787#[target_feature(enable = "avx512f")]
38788#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38789#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38790#[rustc_legacy_const_generics(4)]
38791pub fn _mm_mask_min_round_ss<const SAE: i32>(
38792 src: __m128,
38793 k: __mmask8,
38794 a: __m128,
38795 b: __m128,
38796) -> __m128 {
38797 unsafe {
38798 static_assert_sae!(SAE);
38799 let a: f32x4 = a.as_f32x4();
38800 let b: f32x4 = b.as_f32x4();
38801 let src: f32x4 = src.as_f32x4();
38802 let r: f32x4 = vminss(a, b, src, mask:k, SAE);
38803 transmute(src:r)
38804 }
38805}
38806
38807/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38808/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38809///
38810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_ss&expand=3781)
38811#[inline]
38812#[target_feature(enable = "avx512f")]
38813#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38814#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38815#[rustc_legacy_const_generics(3)]
38816pub fn _mm_maskz_min_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38817 unsafe {
38818 static_assert_sae!(SAE);
38819 let a: f32x4 = a.as_f32x4();
38820 let b: f32x4 = b.as_f32x4();
38821 let r: f32x4 = vminss(a, b, src:f32x4::ZERO, mask:k, SAE);
38822 transmute(src:r)
38823 }
38824}
38825
38826/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
38827/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38828///
38829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_sd&expand=3779)
38830#[inline]
38831#[target_feature(enable = "avx512f")]
38832#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38833#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38834#[rustc_legacy_const_generics(2)]
38835pub fn _mm_min_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38836 unsafe {
38837 static_assert_sae!(SAE);
38838 let a: f64x2 = a.as_f64x2();
38839 let b: f64x2 = b.as_f64x2();
38840 let r: f64x2 = vminsd(a, b, src:f64x2::ZERO, mask:0b1, SAE);
38841 transmute(src:r)
38842 }
38843}
38844
38845/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38846/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38847///
38848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_sd&expand=3777)
38849#[inline]
38850#[target_feature(enable = "avx512f")]
38851#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38852#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38853#[rustc_legacy_const_generics(4)]
38854pub fn _mm_mask_min_round_sd<const SAE: i32>(
38855 src: __m128d,
38856 k: __mmask8,
38857 a: __m128d,
38858 b: __m128d,
38859) -> __m128d {
38860 unsafe {
38861 static_assert_sae!(SAE);
38862 let a: f64x2 = a.as_f64x2();
38863 let b: f64x2 = b.as_f64x2();
38864 let src: f64x2 = src.as_f64x2();
38865 let r: f64x2 = vminsd(a, b, src, mask:k, SAE);
38866 transmute(src:r)
38867 }
38868}
38869
38870/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38871/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38872///
38873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_sd&expand=3778)
38874#[inline]
38875#[target_feature(enable = "avx512f")]
38876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38877#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38878#[rustc_legacy_const_generics(3)]
38879pub fn _mm_maskz_min_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38880 unsafe {
38881 static_assert_sae!(SAE);
38882 let a: f64x2 = a.as_f64x2();
38883 let b: f64x2 = b.as_f64x2();
38884 let r: f64x2 = vminsd(a, b, src:f64x2::ZERO, mask:k, SAE);
38885 transmute(src:r)
38886 }
38887}
38888
38889/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38890///
38891/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38892/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38893/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38894/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38895/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38896/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38897///
38898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_ss&expand=5383)
38899#[inline]
38900#[target_feature(enable = "avx512f")]
38901#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38902#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38903#[rustc_legacy_const_generics(2)]
38904pub fn _mm_sqrt_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38905 unsafe {
38906 static_assert_rounding!(ROUNDING);
38907 vsqrtss(a, b, src:_mm_setzero_ps(), mask:0b1, ROUNDING)
38908 }
38909}
38910
38911/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38912///
38913/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38914/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38915/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38916/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38917/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38918/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38919///
38920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_ss&expand=5381)
38921#[inline]
38922#[target_feature(enable = "avx512f")]
38923#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38924#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38925#[rustc_legacy_const_generics(4)]
38926pub fn _mm_mask_sqrt_round_ss<const ROUNDING: i32>(
38927 src: __m128,
38928 k: __mmask8,
38929 a: __m128,
38930 b: __m128,
38931) -> __m128 {
38932 unsafe {
38933 static_assert_rounding!(ROUNDING);
38934 vsqrtss(a, b, src, mask:k, ROUNDING)
38935 }
38936}
38937
38938/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38939///
38940/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38941/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38942/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38943/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38944/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38945/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38946///
38947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_ss&expand=5382)
38948#[inline]
38949#[target_feature(enable = "avx512f")]
38950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38951#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38952#[rustc_legacy_const_generics(3)]
38953pub fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38954 unsafe {
38955 static_assert_rounding!(ROUNDING);
38956 vsqrtss(a, b, src:_mm_setzero_ps(), mask:k, ROUNDING)
38957 }
38958}
38959
38960/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38961///
38962/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38963/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38964/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38965/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38966/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38967/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38968///
38969/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_sd&expand=5380)
38970#[inline]
38971#[target_feature(enable = "avx512f")]
38972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38973#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38974#[rustc_legacy_const_generics(2)]
38975pub fn _mm_sqrt_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38976 unsafe {
38977 static_assert_rounding!(ROUNDING);
38978 vsqrtsd(a, b, src:_mm_setzero_pd(), mask:0b1, ROUNDING)
38979 }
38980}
38981
38982/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38983///
38984/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38985/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38986/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38987/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38988/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38989/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38990///
38991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_sd&expand=5378)
38992#[inline]
38993#[target_feature(enable = "avx512f")]
38994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
38995#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38996#[rustc_legacy_const_generics(4)]
38997pub fn _mm_mask_sqrt_round_sd<const ROUNDING: i32>(
38998 src: __m128d,
38999 k: __mmask8,
39000 a: __m128d,
39001 b: __m128d,
39002) -> __m128d {
39003 unsafe {
39004 static_assert_rounding!(ROUNDING);
39005 vsqrtsd(a, b, src, mask:k, ROUNDING)
39006 }
39007}
39008
39009/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39010///
39011/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39012/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39013/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39014/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39015/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39016/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39017///
39018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_sd&expand=5379)
39019#[inline]
39020#[target_feature(enable = "avx512f")]
39021#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39022#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
39023#[rustc_legacy_const_generics(3)]
39024pub fn _mm_maskz_sqrt_round_sd<const ROUNDING: i32>(
39025 k: __mmask8,
39026 a: __m128d,
39027 b: __m128d,
39028) -> __m128d {
39029 unsafe {
39030 static_assert_rounding!(ROUNDING);
39031 vsqrtsd(a, b, src:_mm_setzero_pd(), mask:k, ROUNDING)
39032 }
39033}
39034
39035/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39036/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39037///
39038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_ss&expand=2856)
39039#[inline]
39040#[target_feature(enable = "avx512f")]
39041#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39042#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
39043#[rustc_legacy_const_generics(2)]
39044pub fn _mm_getexp_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
39045 unsafe {
39046 static_assert_sae!(SAE);
39047 let a: f32x4 = a.as_f32x4();
39048 let b: f32x4 = b.as_f32x4();
39049 let r: f32x4 = vgetexpss(a, b, src:f32x4::ZERO, mask:0b1, SAE);
39050 transmute(src:r)
39051 }
39052}
39053
39054/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39055/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39056///
39057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_ss&expand=2857)
39058#[inline]
39059#[target_feature(enable = "avx512f")]
39060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39061#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
39062#[rustc_legacy_const_generics(4)]
39063pub fn _mm_mask_getexp_round_ss<const SAE: i32>(
39064 src: __m128,
39065 k: __mmask8,
39066 a: __m128,
39067 b: __m128,
39068) -> __m128 {
39069 unsafe {
39070 static_assert_sae!(SAE);
39071 let a: f32x4 = a.as_f32x4();
39072 let b: f32x4 = b.as_f32x4();
39073 let src: f32x4 = src.as_f32x4();
39074 let r: f32x4 = vgetexpss(a, b, src, mask:k, SAE);
39075 transmute(src:r)
39076 }
39077}
39078
39079/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39080/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39081///
39082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_ss&expand=2858)
39083#[inline]
39084#[target_feature(enable = "avx512f")]
39085#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39086#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
39087#[rustc_legacy_const_generics(3)]
39088pub fn _mm_maskz_getexp_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39089 unsafe {
39090 static_assert_sae!(SAE);
39091 let a: f32x4 = a.as_f32x4();
39092 let b: f32x4 = b.as_f32x4();
39093 let r: f32x4 = vgetexpss(a, b, src:f32x4::ZERO, mask:k, SAE);
39094 transmute(src:r)
39095 }
39096}
39097
39098/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39099/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39100///
39101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_sd&expand=2853)
39102#[inline]
39103#[target_feature(enable = "avx512f")]
39104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39105#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39106#[rustc_legacy_const_generics(2)]
39107pub fn _mm_getexp_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39108 unsafe {
39109 static_assert_sae!(SAE);
39110 let a: f64x2 = a.as_f64x2();
39111 let b: f64x2 = b.as_f64x2();
39112 let r: f64x2 = vgetexpsd(a, b, src:f64x2::ZERO, mask:0b1, SAE);
39113 transmute(src:r)
39114 }
39115}
39116
39117/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39118/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39119///
39120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_sd&expand=2854)
39121#[inline]
39122#[target_feature(enable = "avx512f")]
39123#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39124#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39125#[rustc_legacy_const_generics(4)]
39126pub fn _mm_mask_getexp_round_sd<const SAE: i32>(
39127 src: __m128d,
39128 k: __mmask8,
39129 a: __m128d,
39130 b: __m128d,
39131) -> __m128d {
39132 unsafe {
39133 static_assert_sae!(SAE);
39134 let a: f64x2 = a.as_f64x2();
39135 let b: f64x2 = b.as_f64x2();
39136 let src: f64x2 = src.as_f64x2();
39137 let r: f64x2 = vgetexpsd(a, b, src, mask:k, SAE);
39138 transmute(src:r)
39139 }
39140}
39141
39142/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39143/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39144///
39145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_sd&expand=2855)
39146#[inline]
39147#[target_feature(enable = "avx512f")]
39148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39149#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39150#[rustc_legacy_const_generics(3)]
39151pub fn _mm_maskz_getexp_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39152 unsafe {
39153 static_assert_sae!(SAE);
39154 let a: f64x2 = a.as_f64x2();
39155 let b: f64x2 = b.as_f64x2();
39156 let r: f64x2 = vgetexpsd(a, b, src:f64x2::ZERO, mask:k, SAE);
39157 transmute(src:r)
39158 }
39159}
39160
39161/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39162/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39163/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39164/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39165/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39166/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39167/// The sign is determined by sc which can take the following values:\
39168/// _MM_MANT_SIGN_src // sign = sign(src)\
39169/// _MM_MANT_SIGN_zero // sign = 0\
39170/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39171/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39172///
39173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_ss&expand=2892)
39174#[inline]
39175#[target_feature(enable = "avx512f")]
39176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39177#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39178#[rustc_legacy_const_generics(2, 3, 4)]
39179pub fn _mm_getmant_round_ss<
39180 const NORM: _MM_MANTISSA_NORM_ENUM,
39181 const SIGN: _MM_MANTISSA_SIGN_ENUM,
39182 const SAE: i32,
39183>(
39184 a: __m128,
39185 b: __m128,
39186) -> __m128 {
39187 unsafe {
39188 static_assert_uimm_bits!(NORM, 4);
39189 static_assert_uimm_bits!(SIGN, 2);
39190 static_assert_mantissas_sae!(SAE);
39191 let a: f32x4 = a.as_f32x4();
39192 let b: f32x4 = b.as_f32x4();
39193 let r: f32x4 = vgetmantss(a, b, SIGN << 2 | NORM, src:f32x4::ZERO, m:0b1, SAE);
39194 transmute(src:r)
39195 }
39196}
39197
39198/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39199/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39200/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39201/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39202/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39203/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39204/// The sign is determined by sc which can take the following values:\
39205/// _MM_MANT_SIGN_src // sign = sign(src)\
39206/// _MM_MANT_SIGN_zero // sign = 0\
39207/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39208/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39209///
39210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_ss&expand=2893)
39211#[inline]
39212#[target_feature(enable = "avx512f")]
39213#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39214#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39215#[rustc_legacy_const_generics(4, 5, 6)]
39216pub fn _mm_mask_getmant_round_ss<
39217 const NORM: _MM_MANTISSA_NORM_ENUM,
39218 const SIGN: _MM_MANTISSA_SIGN_ENUM,
39219 const SAE: i32,
39220>(
39221 src: __m128,
39222 k: __mmask8,
39223 a: __m128,
39224 b: __m128,
39225) -> __m128 {
39226 unsafe {
39227 static_assert_uimm_bits!(NORM, 4);
39228 static_assert_uimm_bits!(SIGN, 2);
39229 static_assert_mantissas_sae!(SAE);
39230 let a: f32x4 = a.as_f32x4();
39231 let b: f32x4 = b.as_f32x4();
39232 let src: f32x4 = src.as_f32x4();
39233 let r: f32x4 = vgetmantss(a, b, SIGN << 2 | NORM, src, m:k, SAE);
39234 transmute(src:r)
39235 }
39236}
39237
39238/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39239/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39240/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39241/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39242/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39243/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39244/// The sign is determined by sc which can take the following values:\
39245/// _MM_MANT_SIGN_src // sign = sign(src)\
39246/// _MM_MANT_SIGN_zero // sign = 0\
39247/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39248/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39249///
39250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_ss&expand=2894)
39251#[inline]
39252#[target_feature(enable = "avx512f")]
39253#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39254#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39255#[rustc_legacy_const_generics(3, 4, 5)]
39256pub fn _mm_maskz_getmant_round_ss<
39257 const NORM: _MM_MANTISSA_NORM_ENUM,
39258 const SIGN: _MM_MANTISSA_SIGN_ENUM,
39259 const SAE: i32,
39260>(
39261 k: __mmask8,
39262 a: __m128,
39263 b: __m128,
39264) -> __m128 {
39265 unsafe {
39266 static_assert_uimm_bits!(NORM, 4);
39267 static_assert_uimm_bits!(SIGN, 2);
39268 static_assert_mantissas_sae!(SAE);
39269 let a: f32x4 = a.as_f32x4();
39270 let b: f32x4 = b.as_f32x4();
39271 let r: f32x4 = vgetmantss(a, b, SIGN << 2 | NORM, src:f32x4::ZERO, m:k, SAE);
39272 transmute(src:r)
39273 }
39274}
39275
39276/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39277/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39278/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39279/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39280/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39281/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39282/// The sign is determined by sc which can take the following values:\
39283/// _MM_MANT_SIGN_src // sign = sign(src)\
39284/// _MM_MANT_SIGN_zero // sign = 0\
39285/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39286/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39287///
39288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_sd&expand=2889)
39289#[inline]
39290#[target_feature(enable = "avx512f")]
39291#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39292#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39293#[rustc_legacy_const_generics(2, 3, 4)]
39294pub fn _mm_getmant_round_sd<
39295 const NORM: _MM_MANTISSA_NORM_ENUM,
39296 const SIGN: _MM_MANTISSA_SIGN_ENUM,
39297 const SAE: i32,
39298>(
39299 a: __m128d,
39300 b: __m128d,
39301) -> __m128d {
39302 unsafe {
39303 static_assert_uimm_bits!(NORM, 4);
39304 static_assert_uimm_bits!(SIGN, 2);
39305 static_assert_mantissas_sae!(SAE);
39306 let a: f64x2 = a.as_f64x2();
39307 let b: f64x2 = b.as_f64x2();
39308 let r: f64x2 = vgetmantsd(a, b, SIGN << 2 | NORM, src:f64x2::ZERO, m:0b1, SAE);
39309 transmute(src:r)
39310 }
39311}
39312
39313/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39314/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39315/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39316/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39317/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39318/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39319/// The sign is determined by sc which can take the following values:\
39320/// _MM_MANT_SIGN_src // sign = sign(src)\
39321/// _MM_MANT_SIGN_zero // sign = 0\
39322/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39323/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39324///
39325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_sd&expand=2890)
39326#[inline]
39327#[target_feature(enable = "avx512f")]
39328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39329#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39330#[rustc_legacy_const_generics(4, 5, 6)]
39331pub fn _mm_mask_getmant_round_sd<
39332 const NORM: _MM_MANTISSA_NORM_ENUM,
39333 const SIGN: _MM_MANTISSA_SIGN_ENUM,
39334 const SAE: i32,
39335>(
39336 src: __m128d,
39337 k: __mmask8,
39338 a: __m128d,
39339 b: __m128d,
39340) -> __m128d {
39341 unsafe {
39342 static_assert_uimm_bits!(NORM, 4);
39343 static_assert_uimm_bits!(SIGN, 2);
39344 static_assert_mantissas_sae!(SAE);
39345 let a: f64x2 = a.as_f64x2();
39346 let b: f64x2 = b.as_f64x2();
39347 let src: f64x2 = src.as_f64x2();
39348 let r: f64x2 = vgetmantsd(a, b, SIGN << 2 | NORM, src, m:k, SAE);
39349 transmute(src:r)
39350 }
39351}
39352
39353/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39354/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39355/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39356/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39357/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39358/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39359/// The sign is determined by sc which can take the following values:\
39360/// _MM_MANT_SIGN_src // sign = sign(src)\
39361/// _MM_MANT_SIGN_zero // sign = 0\
39362/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39363/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39364///
39365/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_sd&expand=2891)
39366#[inline]
39367#[target_feature(enable = "avx512f")]
39368#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39369#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39370#[rustc_legacy_const_generics(3, 4, 5)]
39371pub fn _mm_maskz_getmant_round_sd<
39372 const NORM: _MM_MANTISSA_NORM_ENUM,
39373 const SIGN: _MM_MANTISSA_SIGN_ENUM,
39374 const SAE: i32,
39375>(
39376 k: __mmask8,
39377 a: __m128d,
39378 b: __m128d,
39379) -> __m128d {
39380 unsafe {
39381 static_assert_uimm_bits!(NORM, 4);
39382 static_assert_uimm_bits!(SIGN, 2);
39383 static_assert_mantissas_sae!(SAE);
39384 let a: f64x2 = a.as_f64x2();
39385 let b: f64x2 = b.as_f64x2();
39386 let r: f64x2 = vgetmantsd(a, b, SIGN << 2 | NORM, src:f64x2::ZERO, m:k, SAE);
39387 transmute(src:r)
39388 }
39389}
39390
39391/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39392/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39393/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39394/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39395/// * [`_MM_FROUND_TO_POS_INF`] : round up
39396/// * [`_MM_FROUND_TO_ZERO`] : truncate
39397/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39398///
39399/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_ss&expand=4796)
39401#[inline]
39402#[target_feature(enable = "avx512f")]
39403#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39404#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39405#[rustc_legacy_const_generics(2, 3)]
39406pub fn _mm_roundscale_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
39407 unsafe {
39408 static_assert_uimm_bits!(IMM8, 8);
39409 static_assert_mantissas_sae!(SAE);
39410 let a: f32x4 = a.as_f32x4();
39411 let b: f32x4 = b.as_f32x4();
39412 let r: f32x4 = vrndscaless(a, b, src:f32x4::ZERO, mask:0b11111111, IMM8, SAE);
39413 transmute(src:r)
39414 }
39415}
39416
39417/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39418/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39419/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39420/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39421/// * [`_MM_FROUND_TO_POS_INF`] : round up
39422/// * [`_MM_FROUND_TO_ZERO`] : truncate
39423/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39424///
39425/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_ss&expand=4794)
39427#[inline]
39428#[target_feature(enable = "avx512f")]
39429#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39430#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39431#[rustc_legacy_const_generics(4, 5)]
39432pub fn _mm_mask_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39433 src: __m128,
39434 k: __mmask8,
39435 a: __m128,
39436 b: __m128,
39437) -> __m128 {
39438 unsafe {
39439 static_assert_uimm_bits!(IMM8, 8);
39440 static_assert_mantissas_sae!(SAE);
39441 let a: f32x4 = a.as_f32x4();
39442 let b: f32x4 = b.as_f32x4();
39443 let src: f32x4 = src.as_f32x4();
39444 let r: f32x4 = vrndscaless(a, b, src, mask:k, IMM8, SAE);
39445 transmute(src:r)
39446 }
39447}
39448
39449/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39450/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39451/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39452/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39453/// * [`_MM_FROUND_TO_POS_INF`] : round up
39454/// * [`_MM_FROUND_TO_ZERO`] : truncate
39455/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39456///
39457/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_ss&expand=4795)
39459#[inline]
39460#[target_feature(enable = "avx512f")]
39461#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39462#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39463#[rustc_legacy_const_generics(3, 4)]
39464pub fn _mm_maskz_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39465 k: __mmask8,
39466 a: __m128,
39467 b: __m128,
39468) -> __m128 {
39469 unsafe {
39470 static_assert_uimm_bits!(IMM8, 8);
39471 static_assert_mantissas_sae!(SAE);
39472 let a: f32x4 = a.as_f32x4();
39473 let b: f32x4 = b.as_f32x4();
39474 let r: f32x4 = vrndscaless(a, b, src:f32x4::ZERO, mask:k, IMM8, SAE);
39475 transmute(src:r)
39476 }
39477}
39478
39479/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39480/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39481/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39482/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39483/// * [`_MM_FROUND_TO_POS_INF`] : round up
39484/// * [`_MM_FROUND_TO_ZERO`] : truncate
39485/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39486///
39487/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_sd&expand=4793)
39489#[inline]
39490#[target_feature(enable = "avx512f")]
39491#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39492#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39493#[rustc_legacy_const_generics(2, 3)]
39494pub fn _mm_roundscale_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39495 unsafe {
39496 static_assert_uimm_bits!(IMM8, 8);
39497 static_assert_mantissas_sae!(SAE);
39498 let a: f64x2 = a.as_f64x2();
39499 let b: f64x2 = b.as_f64x2();
39500 let r: f64x2 = vrndscalesd(a, b, src:f64x2::ZERO, mask:0b11111111, IMM8, SAE);
39501 transmute(src:r)
39502 }
39503}
39504
39505/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39506/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39507/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39508/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39509/// * [`_MM_FROUND_TO_POS_INF`] : round up
39510/// * [`_MM_FROUND_TO_ZERO`] : truncate
39511/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39512///
39513/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_sd&expand=4791)
39515#[inline]
39516#[target_feature(enable = "avx512f")]
39517#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39518#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39519#[rustc_legacy_const_generics(4, 5)]
39520pub fn _mm_mask_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39521 src: __m128d,
39522 k: __mmask8,
39523 a: __m128d,
39524 b: __m128d,
39525) -> __m128d {
39526 unsafe {
39527 static_assert_uimm_bits!(IMM8, 8);
39528 static_assert_mantissas_sae!(SAE);
39529 let a: f64x2 = a.as_f64x2();
39530 let b: f64x2 = b.as_f64x2();
39531 let src: f64x2 = src.as_f64x2();
39532 let r: f64x2 = vrndscalesd(a, b, src, mask:k, IMM8, SAE);
39533 transmute(src:r)
39534 }
39535}
39536
39537/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39538/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39539/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39540/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39541/// * [`_MM_FROUND_TO_POS_INF`] : round up
39542/// * [`_MM_FROUND_TO_ZERO`] : truncate
39543/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39544///
39545/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_sd&expand=4792)
39547#[inline]
39548#[target_feature(enable = "avx512f")]
39549#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39550#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39551#[rustc_legacy_const_generics(3, 4)]
39552pub fn _mm_maskz_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39553 k: __mmask8,
39554 a: __m128d,
39555 b: __m128d,
39556) -> __m128d {
39557 unsafe {
39558 static_assert_uimm_bits!(IMM8, 8);
39559 static_assert_mantissas_sae!(SAE);
39560 let a: f64x2 = a.as_f64x2();
39561 let b: f64x2 = b.as_f64x2();
39562 let r: f64x2 = vrndscalesd(a, b, src:f64x2::ZERO, mask:k, IMM8, SAE);
39563 transmute(src:r)
39564 }
39565}
39566
39567/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39568///
39569/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39570/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39571/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39572/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39573/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39574/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39575///
39576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_ss&expand=4895)
39577#[inline]
39578#[target_feature(enable = "avx512f")]
39579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39580#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39581#[rustc_legacy_const_generics(2)]
39582pub fn _mm_scalef_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
39583 unsafe {
39584 static_assert_rounding!(ROUNDING);
39585 let a: f32x4 = a.as_f32x4();
39586 let b: f32x4 = b.as_f32x4();
39587 let r: f32x4 = vscalefss(a, b, src:f32x4::ZERO, mask:0b11111111, ROUNDING);
39588 transmute(src:r)
39589 }
39590}
39591
39592/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39593///
39594/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39595/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39596/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39597/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39598/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39599/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39600///
39601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_ss&expand=4893)
39602#[inline]
39603#[target_feature(enable = "avx512f")]
39604#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39605#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39606#[rustc_legacy_const_generics(4)]
39607pub fn _mm_mask_scalef_round_ss<const ROUNDING: i32>(
39608 src: __m128,
39609 k: __mmask8,
39610 a: __m128,
39611 b: __m128,
39612) -> __m128 {
39613 unsafe {
39614 static_assert_rounding!(ROUNDING);
39615 let a: f32x4 = a.as_f32x4();
39616 let b: f32x4 = b.as_f32x4();
39617 let src: f32x4 = src.as_f32x4();
39618 let r: f32x4 = vscalefss(a, b, src, mask:k, ROUNDING);
39619 transmute(src:r)
39620 }
39621}
39622
39623/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39624///
39625/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39626/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39627/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39628/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39629/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39630/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39631///
39632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_ss&expand=4894)
39633#[inline]
39634#[target_feature(enable = "avx512f")]
39635#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39636#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39637#[rustc_legacy_const_generics(3)]
39638pub fn _mm_maskz_scalef_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39639 unsafe {
39640 static_assert_rounding!(ROUNDING);
39641 let a: f32x4 = a.as_f32x4();
39642 let b: f32x4 = b.as_f32x4();
39643 let r: f32x4 = vscalefss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
39644 transmute(src:r)
39645 }
39646}
39647
39648/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39649///
39650/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39651/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39652/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39653/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39654/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39655/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39656///
39657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_sd&expand=4892)
39658#[inline]
39659#[target_feature(enable = "avx512f")]
39660#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39661#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39662#[rustc_legacy_const_generics(2)]
39663pub fn _mm_scalef_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
39664 unsafe {
39665 static_assert_rounding!(ROUNDING);
39666 let a: f64x2 = a.as_f64x2();
39667 let b: f64x2 = b.as_f64x2();
39668 let r: f64x2 = vscalefsd(a, b, src:f64x2::ZERO, mask:0b11111111, ROUNDING);
39669 transmute(src:r)
39670 }
39671}
39672
39673/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39674///
39675/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39676/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39677/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39678/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39679/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39680/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39681///
39682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_sd&expand=4890)
39683#[inline]
39684#[target_feature(enable = "avx512f")]
39685#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39686#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39687#[rustc_legacy_const_generics(4)]
39688pub fn _mm_mask_scalef_round_sd<const ROUNDING: i32>(
39689 src: __m128d,
39690 k: __mmask8,
39691 a: __m128d,
39692 b: __m128d,
39693) -> __m128d {
39694 unsafe {
39695 let a: f64x2 = a.as_f64x2();
39696 let b: f64x2 = b.as_f64x2();
39697 let src: f64x2 = src.as_f64x2();
39698 let r: f64x2 = vscalefsd(a, b, src, mask:k, ROUNDING);
39699 transmute(src:r)
39700 }
39701}
39702
39703/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39704///
39705/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39706/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39707/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39708/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39709/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39710/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39711///
39712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_sd&expand=4891)
39713#[inline]
39714#[target_feature(enable = "avx512f")]
39715#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39716#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39717#[rustc_legacy_const_generics(3)]
39718pub fn _mm_maskz_scalef_round_sd<const ROUNDING: i32>(
39719 k: __mmask8,
39720 a: __m128d,
39721 b: __m128d,
39722) -> __m128d {
39723 unsafe {
39724 static_assert_rounding!(ROUNDING);
39725 let a: f64x2 = a.as_f64x2();
39726 let b: f64x2 = b.as_f64x2();
39727 let r: f64x2 = vscalefsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
39728 transmute(src:r)
39729 }
39730}
39731
39732/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39733///
39734/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39735/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39736/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39737/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39738/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39739/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39740///
39741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_ss&expand=2573)
39742#[inline]
39743#[target_feature(enable = "avx512f")]
39744#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39745#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39746#[rustc_legacy_const_generics(3)]
39747pub fn _mm_fmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
39748 unsafe {
39749 static_assert_rounding!(ROUNDING);
39750 let extracta: f32 = simd_extract!(a, 0);
39751 let extractb: f32 = simd_extract!(b, 0);
39752 let extractc: f32 = simd_extract!(c, 0);
39753 let r: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
39754 simd_insert!(a, 0, r)
39755 }
39756}
39757
39758/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39759///
39760/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39761/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39762/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39763/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39764/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39765/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39766///
39767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_ss&expand=2574)
39768#[inline]
39769#[target_feature(enable = "avx512f")]
39770#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39771#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39772#[rustc_legacy_const_generics(4)]
39773pub fn _mm_mask_fmadd_round_ss<const ROUNDING: i32>(
39774 a: __m128,
39775 k: __mmask8,
39776 b: __m128,
39777 c: __m128,
39778) -> __m128 {
39779 unsafe {
39780 static_assert_rounding!(ROUNDING);
39781 let mut fmadd: f32 = simd_extract!(a, 0);
39782 if (k & 0b00000001) != 0 {
39783 let extractb: f32 = simd_extract!(b, 0);
39784 let extractc: f32 = simd_extract!(c, 0);
39785 fmadd = vfmaddssround(a:fmadd, b:extractb, c:extractc, ROUNDING);
39786 }
39787 simd_insert!(a, 0, fmadd)
39788 }
39789}
39790
39791/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39792///
39793/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39794/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39795/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39796/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39797/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39798/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39799///
39800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_ss&expand=2576)
39801#[inline]
39802#[target_feature(enable = "avx512f")]
39803#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39804#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39805#[rustc_legacy_const_generics(4)]
39806pub fn _mm_maskz_fmadd_round_ss<const ROUNDING: i32>(
39807 k: __mmask8,
39808 a: __m128,
39809 b: __m128,
39810 c: __m128,
39811) -> __m128 {
39812 unsafe {
39813 static_assert_rounding!(ROUNDING);
39814 let mut fmadd: f32 = 0.;
39815 if (k & 0b00000001) != 0 {
39816 let extracta: f32 = simd_extract!(a, 0);
39817 let extractb: f32 = simd_extract!(b, 0);
39818 let extractc: f32 = simd_extract!(c, 0);
39819 fmadd = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
39820 }
39821 simd_insert!(a, 0, fmadd)
39822 }
39823}
39824
39825/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
39826///
39827/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39828/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39829/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39830/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39831/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39832/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39833///
39834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_ss&expand=2575)
39835#[inline]
39836#[target_feature(enable = "avx512f")]
39837#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39838#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39839#[rustc_legacy_const_generics(4)]
39840pub fn _mm_mask3_fmadd_round_ss<const ROUNDING: i32>(
39841 a: __m128,
39842 b: __m128,
39843 c: __m128,
39844 k: __mmask8,
39845) -> __m128 {
39846 unsafe {
39847 static_assert_rounding!(ROUNDING);
39848 let mut fmadd: f32 = simd_extract!(c, 0);
39849 if (k & 0b00000001) != 0 {
39850 let extracta: f32 = simd_extract!(a, 0);
39851 let extractb: f32 = simd_extract!(b, 0);
39852 fmadd = vfmaddssround(a:extracta, b:extractb, c:fmadd, ROUNDING);
39853 }
39854 simd_insert!(c, 0, fmadd)
39855 }
39856}
39857
39858/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39859///
39860/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39861/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39862/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39863/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39864/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39865/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39866///
39867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_sd&expand=2569)
39868#[inline]
39869#[target_feature(enable = "avx512f")]
39870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39871#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39872#[rustc_legacy_const_generics(3)]
39873pub fn _mm_fmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39874 unsafe {
39875 static_assert_rounding!(ROUNDING);
39876 let extracta: f64 = simd_extract!(a, 0);
39877 let extractb: f64 = simd_extract!(b, 0);
39878 let extractc: f64 = simd_extract!(c, 0);
39879 let fmadd: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
39880 simd_insert!(a, 0, fmadd)
39881 }
39882}
39883
39884/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39885///
39886/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39887/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39888/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39889/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39890/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39891/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39892///
39893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_sd&expand=2570)
39894#[inline]
39895#[target_feature(enable = "avx512f")]
39896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39897#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39898#[rustc_legacy_const_generics(4)]
39899pub fn _mm_mask_fmadd_round_sd<const ROUNDING: i32>(
39900 a: __m128d,
39901 k: __mmask8,
39902 b: __m128d,
39903 c: __m128d,
39904) -> __m128d {
39905 unsafe {
39906 static_assert_rounding!(ROUNDING);
39907 let mut fmadd: f64 = simd_extract!(a, 0);
39908 if (k & 0b00000001) != 0 {
39909 let extractb: f64 = simd_extract!(b, 0);
39910 let extractc: f64 = simd_extract!(c, 0);
39911 fmadd = vfmaddsdround(a:fmadd, b:extractb, c:extractc, ROUNDING);
39912 }
39913 simd_insert!(a, 0, fmadd)
39914 }
39915}
39916
39917/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39918///
39919/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39920/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39921/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39922/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39923/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39924/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39925///
39926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_sd&expand=2572)
39927#[inline]
39928#[target_feature(enable = "avx512f")]
39929#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39930#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39931#[rustc_legacy_const_generics(4)]
39932pub fn _mm_maskz_fmadd_round_sd<const ROUNDING: i32>(
39933 k: __mmask8,
39934 a: __m128d,
39935 b: __m128d,
39936 c: __m128d,
39937) -> __m128d {
39938 unsafe {
39939 static_assert_rounding!(ROUNDING);
39940 let mut fmadd: f64 = 0.;
39941 if (k & 0b00000001) != 0 {
39942 let extracta: f64 = simd_extract!(a, 0);
39943 let extractb: f64 = simd_extract!(b, 0);
39944 let extractc: f64 = simd_extract!(c, 0);
39945 fmadd = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
39946 }
39947 simd_insert!(a, 0, fmadd)
39948 }
39949}
39950
39951/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
39952///
39953/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39954/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39955/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39956/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39957/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39958/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39959///
39960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_sd&expand=2571)
39961#[inline]
39962#[target_feature(enable = "avx512f")]
39963#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39964#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39965#[rustc_legacy_const_generics(4)]
39966pub fn _mm_mask3_fmadd_round_sd<const ROUNDING: i32>(
39967 a: __m128d,
39968 b: __m128d,
39969 c: __m128d,
39970 k: __mmask8,
39971) -> __m128d {
39972 unsafe {
39973 static_assert_rounding!(ROUNDING);
39974 let mut fmadd: f64 = simd_extract!(c, 0);
39975 if (k & 0b00000001) != 0 {
39976 let extracta: f64 = simd_extract!(a, 0);
39977 let extractb: f64 = simd_extract!(b, 0);
39978 fmadd = vfmaddsdround(a:extracta, b:extractb, c:fmadd, ROUNDING);
39979 }
39980 simd_insert!(c, 0, fmadd)
39981 }
39982}
39983
39984/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39985///
39986/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39987/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39988/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39989/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39990/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39991/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39992///
39993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_ss&expand=2659)
39994#[inline]
39995#[target_feature(enable = "avx512f")]
39996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
39997#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
39998#[rustc_legacy_const_generics(3)]
39999pub fn _mm_fmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40000 unsafe {
40001 static_assert_rounding!(ROUNDING);
40002 let extracta: f32 = simd_extract!(a, 0);
40003 let extractb: f32 = simd_extract!(b, 0);
40004 let extractc: f32 = simd_extract!(c, 0);
40005 let extractc: f32 = -extractc;
40006 let fmsub: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40007 simd_insert!(a, 0, fmsub)
40008 }
40009}
40010
40011/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40012///
40013/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40014/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40015/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40016/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40017/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40018/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40019///
40020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_ss&expand=2660)
40021#[inline]
40022#[target_feature(enable = "avx512f")]
40023#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40024#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40025#[rustc_legacy_const_generics(4)]
40026pub fn _mm_mask_fmsub_round_ss<const ROUNDING: i32>(
40027 a: __m128,
40028 k: __mmask8,
40029 b: __m128,
40030 c: __m128,
40031) -> __m128 {
40032 unsafe {
40033 static_assert_rounding!(ROUNDING);
40034 let mut fmsub: f32 = simd_extract!(a, 0);
40035 if (k & 0b00000001) != 0 {
40036 let extractb: f32 = simd_extract!(b, 0);
40037 let extractc: f32 = simd_extract!(c, 0);
40038 let extractc: f32 = -extractc;
40039 fmsub = vfmaddssround(a:fmsub, b:extractb, c:extractc, ROUNDING);
40040 }
40041 simd_insert!(a, 0, fmsub)
40042 }
40043}
40044
40045/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40046///
40047/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40048/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40049/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40050/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40051/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40052/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40053///
40054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_ss&expand=2662)
40055#[inline]
40056#[target_feature(enable = "avx512f")]
40057#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40058#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40059#[rustc_legacy_const_generics(4)]
40060pub fn _mm_maskz_fmsub_round_ss<const ROUNDING: i32>(
40061 k: __mmask8,
40062 a: __m128,
40063 b: __m128,
40064 c: __m128,
40065) -> __m128 {
40066 unsafe {
40067 static_assert_rounding!(ROUNDING);
40068 let mut fmsub: f32 = 0.;
40069 if (k & 0b00000001) != 0 {
40070 let extracta: f32 = simd_extract!(a, 0);
40071 let extractb: f32 = simd_extract!(b, 0);
40072 let extractc: f32 = simd_extract!(c, 0);
40073 let extractc: f32 = -extractc;
40074 fmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40075 }
40076 simd_insert!(a, 0, fmsub)
40077 }
40078}
40079
40080/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40081///
40082/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40083/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40084/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40085/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40086/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40087/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40088///
40089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_ss&expand=2661)
40090#[inline]
40091#[target_feature(enable = "avx512f")]
40092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40093#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40094#[rustc_legacy_const_generics(4)]
40095pub fn _mm_mask3_fmsub_round_ss<const ROUNDING: i32>(
40096 a: __m128,
40097 b: __m128,
40098 c: __m128,
40099 k: __mmask8,
40100) -> __m128 {
40101 unsafe {
40102 static_assert_rounding!(ROUNDING);
40103 let mut fmsub: f32 = simd_extract!(c, 0);
40104 if (k & 0b00000001) != 0 {
40105 let extracta: f32 = simd_extract!(a, 0);
40106 let extractb: f32 = simd_extract!(b, 0);
40107 let extractc: f32 = -fmsub;
40108 fmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40109 }
40110 simd_insert!(c, 0, fmsub)
40111 }
40112}
40113
40114/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40115///
40116/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40117/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40118/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40119/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40120/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40121/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40122///
40123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_sd&expand=2655)
40124#[inline]
40125#[target_feature(enable = "avx512f")]
40126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40127#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40128#[rustc_legacy_const_generics(3)]
40129pub fn _mm_fmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40130 unsafe {
40131 static_assert_rounding!(ROUNDING);
40132 let extracta: f64 = simd_extract!(a, 0);
40133 let extractb: f64 = simd_extract!(b, 0);
40134 let extractc: f64 = simd_extract!(c, 0);
40135 let extractc: f64 = -extractc;
40136 let fmsub: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40137 simd_insert!(a, 0, fmsub)
40138 }
40139}
40140
40141/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40142///
40143/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40144/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40145/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40146/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40147/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40148/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40149///
40150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_sd&expand=2656)
40151#[inline]
40152#[target_feature(enable = "avx512f")]
40153#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40154#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40155#[rustc_legacy_const_generics(4)]
40156pub fn _mm_mask_fmsub_round_sd<const ROUNDING: i32>(
40157 a: __m128d,
40158 k: __mmask8,
40159 b: __m128d,
40160 c: __m128d,
40161) -> __m128d {
40162 unsafe {
40163 static_assert_rounding!(ROUNDING);
40164 let mut fmsub: f64 = simd_extract!(a, 0);
40165 if (k & 0b00000001) != 0 {
40166 let extractb: f64 = simd_extract!(b, 0);
40167 let extractc: f64 = simd_extract!(c, 0);
40168 let extractc: f64 = -extractc;
40169 fmsub = vfmaddsdround(a:fmsub, b:extractb, c:extractc, ROUNDING);
40170 }
40171 simd_insert!(a, 0, fmsub)
40172 }
40173}
40174
40175/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40176///
40177/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40178/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40179/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40180/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40181/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40182/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40183///
40184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_sd&expand=2658)
40185#[inline]
40186#[target_feature(enable = "avx512f")]
40187#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40188#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40189#[rustc_legacy_const_generics(4)]
40190pub fn _mm_maskz_fmsub_round_sd<const ROUNDING: i32>(
40191 k: __mmask8,
40192 a: __m128d,
40193 b: __m128d,
40194 c: __m128d,
40195) -> __m128d {
40196 unsafe {
40197 static_assert_rounding!(ROUNDING);
40198 let mut fmsub: f64 = 0.;
40199 if (k & 0b00000001) != 0 {
40200 let extracta: f64 = simd_extract!(a, 0);
40201 let extractb: f64 = simd_extract!(b, 0);
40202 let extractc: f64 = simd_extract!(c, 0);
40203 let extractc: f64 = -extractc;
40204 fmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40205 }
40206 simd_insert!(a, 0, fmsub)
40207 }
40208}
40209
40210/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40211///
40212/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40213/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40214/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40215/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40216/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40217/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40218///
40219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_sd&expand=2657)
40220#[inline]
40221#[target_feature(enable = "avx512f")]
40222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40223#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40224#[rustc_legacy_const_generics(4)]
40225pub fn _mm_mask3_fmsub_round_sd<const ROUNDING: i32>(
40226 a: __m128d,
40227 b: __m128d,
40228 c: __m128d,
40229 k: __mmask8,
40230) -> __m128d {
40231 unsafe {
40232 static_assert_rounding!(ROUNDING);
40233 let mut fmsub: f64 = simd_extract!(c, 0);
40234 if (k & 0b00000001) != 0 {
40235 let extracta: f64 = simd_extract!(a, 0);
40236 let extractb: f64 = simd_extract!(b, 0);
40237 let extractc: f64 = -fmsub;
40238 fmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40239 }
40240 simd_insert!(c, 0, fmsub)
40241 }
40242}
40243
40244/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40245///
40246/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40247/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40248/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40249/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40250/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40251/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40252///
40253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_ss&expand=2739)
40254#[inline]
40255#[target_feature(enable = "avx512f")]
40256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40257#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40258#[rustc_legacy_const_generics(3)]
40259pub fn _mm_fnmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40260 unsafe {
40261 static_assert_rounding!(ROUNDING);
40262 let extracta: f32 = simd_extract!(a, 0);
40263 let extracta: f32 = -extracta;
40264 let extractb: f32 = simd_extract!(b, 0);
40265 let extractc: f32 = simd_extract!(c, 0);
40266 let fnmadd: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40267 simd_insert!(a, 0, fnmadd)
40268 }
40269}
40270
40271/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40272///
40273/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40274/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40275/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40276/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40277/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40278/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40279///
40280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_ss&expand=2740)
40281#[inline]
40282#[target_feature(enable = "avx512f")]
40283#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40284#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40285#[rustc_legacy_const_generics(4)]
40286pub fn _mm_mask_fnmadd_round_ss<const ROUNDING: i32>(
40287 a: __m128,
40288 k: __mmask8,
40289 b: __m128,
40290 c: __m128,
40291) -> __m128 {
40292 unsafe {
40293 static_assert_rounding!(ROUNDING);
40294 let mut fnmadd: f32 = simd_extract!(a, 0);
40295 if (k & 0b00000001) != 0 {
40296 let extracta: f32 = -fnmadd;
40297 let extractb: f32 = simd_extract!(b, 0);
40298 let extractc: f32 = simd_extract!(c, 0);
40299 fnmadd = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40300 }
40301 simd_insert!(a, 0, fnmadd)
40302 }
40303}
40304
40305/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40306///
40307/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40308/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40309/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40310/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40311/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40312/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40313///
40314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_ss&expand=2742)
40315#[inline]
40316#[target_feature(enable = "avx512f")]
40317#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40318#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40319#[rustc_legacy_const_generics(4)]
40320pub fn _mm_maskz_fnmadd_round_ss<const ROUNDING: i32>(
40321 k: __mmask8,
40322 a: __m128,
40323 b: __m128,
40324 c: __m128,
40325) -> __m128 {
40326 unsafe {
40327 static_assert_rounding!(ROUNDING);
40328 let mut fnmadd: f32 = 0.;
40329 if (k & 0b00000001) != 0 {
40330 let extracta: f32 = simd_extract!(a, 0);
40331 let extracta: f32 = -extracta;
40332 let extractb: f32 = simd_extract!(b, 0);
40333 let extractc: f32 = simd_extract!(c, 0);
40334 fnmadd = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40335 }
40336 simd_insert!(a, 0, fnmadd)
40337 }
40338}
40339
40340/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40341///
40342/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40343/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40344/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40345/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40346/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40347/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40348///
40349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_ss&expand=2741)
40350#[inline]
40351#[target_feature(enable = "avx512f")]
40352#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40353#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40354#[rustc_legacy_const_generics(4)]
40355pub fn _mm_mask3_fnmadd_round_ss<const ROUNDING: i32>(
40356 a: __m128,
40357 b: __m128,
40358 c: __m128,
40359 k: __mmask8,
40360) -> __m128 {
40361 unsafe {
40362 static_assert_rounding!(ROUNDING);
40363 let mut fnmadd: f32 = simd_extract!(c, 0);
40364 if (k & 0b00000001) != 0 {
40365 let extracta: f32 = simd_extract!(a, 0);
40366 let extracta: f32 = -extracta;
40367 let extractb: f32 = simd_extract!(b, 0);
40368 fnmadd = vfmaddssround(a:extracta, b:extractb, c:fnmadd, ROUNDING);
40369 }
40370 simd_insert!(c, 0, fnmadd)
40371 }
40372}
40373
40374/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40375///
40376/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40377/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40378/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40379/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40380/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40381/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40382///
40383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_sd&expand=2735)
40384#[inline]
40385#[target_feature(enable = "avx512f")]
40386#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40387#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40388#[rustc_legacy_const_generics(3)]
40389pub fn _mm_fnmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40390 unsafe {
40391 static_assert_rounding!(ROUNDING);
40392 let extracta: f64 = simd_extract!(a, 0);
40393 let extracta: f64 = -extracta;
40394 let extractb: f64 = simd_extract!(b, 0);
40395 let extractc: f64 = simd_extract!(c, 0);
40396 let fnmadd: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40397 simd_insert!(a, 0, fnmadd)
40398 }
40399}
40400
40401/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40402///
40403/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40404/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40405/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40406/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40407/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40408/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40409///
40410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_sd&expand=2736)
40411#[inline]
40412#[target_feature(enable = "avx512f")]
40413#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40414#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40415#[rustc_legacy_const_generics(4)]
40416pub fn _mm_mask_fnmadd_round_sd<const ROUNDING: i32>(
40417 a: __m128d,
40418 k: __mmask8,
40419 b: __m128d,
40420 c: __m128d,
40421) -> __m128d {
40422 unsafe {
40423 static_assert_rounding!(ROUNDING);
40424 let mut fnmadd: f64 = simd_extract!(a, 0);
40425 if (k & 0b00000001) != 0 {
40426 let extracta: f64 = -fnmadd;
40427 let extractb: f64 = simd_extract!(b, 0);
40428 let extractc: f64 = simd_extract!(c, 0);
40429 fnmadd = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40430 }
40431 simd_insert!(a, 0, fnmadd)
40432 }
40433}
40434
40435/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40436///
40437/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40438/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40439/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40440/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40441/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40442/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40443///
40444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_sd&expand=2738)
40445#[inline]
40446#[target_feature(enable = "avx512f")]
40447#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40448#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40449#[rustc_legacy_const_generics(4)]
40450pub fn _mm_maskz_fnmadd_round_sd<const ROUNDING: i32>(
40451 k: __mmask8,
40452 a: __m128d,
40453 b: __m128d,
40454 c: __m128d,
40455) -> __m128d {
40456 unsafe {
40457 static_assert_rounding!(ROUNDING);
40458 let mut fnmadd: f64 = 0.;
40459 if (k & 0b00000001) != 0 {
40460 let extracta: f64 = simd_extract!(a, 0);
40461 let extracta: f64 = -extracta;
40462 let extractb: f64 = simd_extract!(b, 0);
40463 let extractc: f64 = simd_extract!(c, 0);
40464 fnmadd = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40465 }
40466 simd_insert!(a, 0, fnmadd)
40467 }
40468}
40469
40470/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40471///
40472/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40473/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40474/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40475/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40476/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40477/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40478///
40479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_sd&expand=2737)
40480#[inline]
40481#[target_feature(enable = "avx512f")]
40482#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40483#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40484#[rustc_legacy_const_generics(4)]
40485pub fn _mm_mask3_fnmadd_round_sd<const ROUNDING: i32>(
40486 a: __m128d,
40487 b: __m128d,
40488 c: __m128d,
40489 k: __mmask8,
40490) -> __m128d {
40491 unsafe {
40492 static_assert_rounding!(ROUNDING);
40493 let mut fnmadd: f64 = simd_extract!(c, 0);
40494 if (k & 0b00000001) != 0 {
40495 let extracta: f64 = simd_extract!(a, 0);
40496 let extracta: f64 = -extracta;
40497 let extractb: f64 = simd_extract!(b, 0);
40498 fnmadd = vfmaddsdround(a:extracta, b:extractb, c:fnmadd, ROUNDING);
40499 }
40500 simd_insert!(c, 0, fnmadd)
40501 }
40502}
40503
40504/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40505///
40506/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40507/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40508/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40509/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40510/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40511/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40512///
40513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_ss&expand=2787)
40514#[inline]
40515#[target_feature(enable = "avx512f")]
40516#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40517#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40518#[rustc_legacy_const_generics(3)]
40519pub fn _mm_fnmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40520 unsafe {
40521 static_assert_rounding!(ROUNDING);
40522 let extracta: f32 = simd_extract!(a, 0);
40523 let extracta: f32 = -extracta;
40524 let extractb: f32 = simd_extract!(b, 0);
40525 let extractc: f32 = simd_extract!(c, 0);
40526 let extractc: f32 = -extractc;
40527 let fnmsub: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40528 simd_insert!(a, 0, fnmsub)
40529 }
40530}
40531
40532/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40533///
40534/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40535/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40536/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40537/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40538/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40539/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40540///
40541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_ss&expand=2788)
40542#[inline]
40543#[target_feature(enable = "avx512f")]
40544#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40545#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40546#[rustc_legacy_const_generics(4)]
40547pub fn _mm_mask_fnmsub_round_ss<const ROUNDING: i32>(
40548 a: __m128,
40549 k: __mmask8,
40550 b: __m128,
40551 c: __m128,
40552) -> __m128 {
40553 unsafe {
40554 static_assert_rounding!(ROUNDING);
40555 let mut fnmsub: f32 = simd_extract!(a, 0);
40556 if (k & 0b00000001) != 0 {
40557 let extracta: f32 = -fnmsub;
40558 let extractb: f32 = simd_extract!(b, 0);
40559 let extractc: f32 = simd_extract!(c, 0);
40560 let extractc: f32 = -extractc;
40561 fnmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40562 }
40563 simd_insert!(a, 0, fnmsub)
40564 }
40565}
40566
40567/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40568///
40569/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40570/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40571/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40572/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40573/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40574/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40575///
40576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_ss&expand=2790)
40577#[inline]
40578#[target_feature(enable = "avx512f")]
40579#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40580#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40581#[rustc_legacy_const_generics(4)]
40582pub fn _mm_maskz_fnmsub_round_ss<const ROUNDING: i32>(
40583 k: __mmask8,
40584 a: __m128,
40585 b: __m128,
40586 c: __m128,
40587) -> __m128 {
40588 unsafe {
40589 static_assert_rounding!(ROUNDING);
40590 let mut fnmsub: f32 = 0.;
40591 if (k & 0b00000001) != 0 {
40592 let extracta: f32 = simd_extract!(a, 0);
40593 let extracta: f32 = -extracta;
40594 let extractb: f32 = simd_extract!(b, 0);
40595 let extractc: f32 = simd_extract!(c, 0);
40596 let extractc: f32 = -extractc;
40597 fnmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40598 }
40599 simd_insert!(a, 0, fnmsub)
40600 }
40601}
40602
40603/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40604///
40605/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40606/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40607/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40608/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40609/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40610/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40611///
40612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_ss&expand=2789)
40613#[inline]
40614#[target_feature(enable = "avx512f")]
40615#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40616#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40617#[rustc_legacy_const_generics(4)]
40618pub fn _mm_mask3_fnmsub_round_ss<const ROUNDING: i32>(
40619 a: __m128,
40620 b: __m128,
40621 c: __m128,
40622 k: __mmask8,
40623) -> __m128 {
40624 unsafe {
40625 static_assert_rounding!(ROUNDING);
40626 let mut fnmsub: f32 = simd_extract!(c, 0);
40627 if (k & 0b00000001) != 0 {
40628 let extracta: f32 = simd_extract!(a, 0);
40629 let extracta: f32 = -extracta;
40630 let extractb: f32 = simd_extract!(b, 0);
40631 let extractc: f32 = -fnmsub;
40632 fnmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40633 }
40634 simd_insert!(c, 0, fnmsub)
40635 }
40636}
40637
40638/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40639///
40640/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40641/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40642/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40643/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40644/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40645/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40646///
40647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_sd&expand=2783)
40648#[inline]
40649#[target_feature(enable = "avx512f")]
40650#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40651#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40652#[rustc_legacy_const_generics(3)]
40653pub fn _mm_fnmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40654 unsafe {
40655 static_assert_rounding!(ROUNDING);
40656 let extracta: f64 = simd_extract!(a, 0);
40657 let extracta: f64 = -extracta;
40658 let extractb: f64 = simd_extract!(b, 0);
40659 let extractc: f64 = simd_extract!(c, 0);
40660 let extractc: f64 = -extractc;
40661 let fnmsub: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40662 simd_insert!(a, 0, fnmsub)
40663 }
40664}
40665
40666/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40667///
40668/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40669/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40670/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40671/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40672/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40673/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40674///
40675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_sd&expand=2784)
40676#[inline]
40677#[target_feature(enable = "avx512f")]
40678#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40679#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40680#[rustc_legacy_const_generics(4)]
40681pub fn _mm_mask_fnmsub_round_sd<const ROUNDING: i32>(
40682 a: __m128d,
40683 k: __mmask8,
40684 b: __m128d,
40685 c: __m128d,
40686) -> __m128d {
40687 unsafe {
40688 static_assert_rounding!(ROUNDING);
40689 let mut fnmsub: f64 = simd_extract!(a, 0);
40690 if (k & 0b00000001) != 0 {
40691 let extracta: f64 = -fnmsub;
40692 let extractb: f64 = simd_extract!(b, 0);
40693 let extractc: f64 = simd_extract!(c, 0);
40694 let extractc: f64 = -extractc;
40695 fnmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40696 }
40697 simd_insert!(a, 0, fnmsub)
40698 }
40699}
40700
40701/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40702///
40703/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40704/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40705/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40706/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40707/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40708/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40709///
40710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_sd&expand=2786)
40711#[inline]
40712#[target_feature(enable = "avx512f")]
40713#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40714#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40715#[rustc_legacy_const_generics(4)]
40716pub fn _mm_maskz_fnmsub_round_sd<const ROUNDING: i32>(
40717 k: __mmask8,
40718 a: __m128d,
40719 b: __m128d,
40720 c: __m128d,
40721) -> __m128d {
40722 unsafe {
40723 static_assert_rounding!(ROUNDING);
40724 let mut fnmsub: f64 = 0.;
40725 if (k & 0b00000001) != 0 {
40726 let extracta: f64 = simd_extract!(a, 0);
40727 let extracta: f64 = -extracta;
40728 let extractb: f64 = simd_extract!(b, 0);
40729 let extractc: f64 = simd_extract!(c, 0);
40730 let extractc: f64 = -extractc;
40731 fnmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40732 }
40733 simd_insert!(a, 0, fnmsub)
40734 }
40735}
40736
40737/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40738///
40739/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40740/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40741/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40742/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40743/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40744/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40745///
40746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_sd&expand=2785)
40747#[inline]
40748#[target_feature(enable = "avx512f")]
40749#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40750#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40751#[rustc_legacy_const_generics(4)]
40752pub fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
40753 a: __m128d,
40754 b: __m128d,
40755 c: __m128d,
40756 k: __mmask8,
40757) -> __m128d {
40758 unsafe {
40759 static_assert_rounding!(ROUNDING);
40760 let mut fnmsub: f64 = simd_extract!(c, 0);
40761 if (k & 0b00000001) != 0 {
40762 let extracta: f64 = simd_extract!(a, 0);
40763 let extracta: f64 = -extracta;
40764 let extractb: f64 = simd_extract!(b, 0);
40765 let extractc: f64 = -fnmsub;
40766 fnmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40767 }
40768 simd_insert!(c, 0, fnmsub)
40769 }
40770}
40771
40772/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40773///
40774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517)
40775#[inline]
40776#[target_feature(enable = "avx512f")]
40777#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40778#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40779#[rustc_legacy_const_generics(3)]
40780pub fn _mm_fixupimm_ss<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
40781 unsafe {
40782 static_assert_uimm_bits!(IMM8, 8);
40783 let a: f32x4 = a.as_f32x4();
40784 let b: f32x4 = b.as_f32x4();
40785 let c: i32x4 = c.as_i32x4();
40786 let r: f32x4 = vfixupimmss(a, b, c, IMM8, mask:0b11111111, _MM_FROUND_CUR_DIRECTION);
40787 let fixupimm: f32 = simd_extract!(r, 0);
40788 let r: f32x4 = simd_insert!(a, 0, fixupimm);
40789 transmute(src:r)
40790 }
40791}
40792
40793/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40794///
40795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518)
40796#[inline]
40797#[target_feature(enable = "avx512f")]
40798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40799#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40800#[rustc_legacy_const_generics(4)]
40801pub fn _mm_mask_fixupimm_ss<const IMM8: i32>(
40802 a: __m128,
40803 k: __mmask8,
40804 b: __m128,
40805 c: __m128i,
40806) -> __m128 {
40807 unsafe {
40808 static_assert_uimm_bits!(IMM8, 8);
40809 let a: f32x4 = a.as_f32x4();
40810 let b: f32x4 = b.as_f32x4();
40811 let c: i32x4 = c.as_i32x4();
40812 let fixupimm: f32x4 = vfixupimmss(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
40813 let fixupimm: f32 = simd_extract!(fixupimm, 0);
40814 let r: f32x4 = simd_insert!(a, 0, fixupimm);
40815 transmute(src:r)
40816 }
40817}
40818
40819/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40820///
40821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519)
40822#[inline]
40823#[target_feature(enable = "avx512f")]
40824#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40825#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40826#[rustc_legacy_const_generics(4)]
40827pub fn _mm_maskz_fixupimm_ss<const IMM8: i32>(
40828 k: __mmask8,
40829 a: __m128,
40830 b: __m128,
40831 c: __m128i,
40832) -> __m128 {
40833 unsafe {
40834 static_assert_uimm_bits!(IMM8, 8);
40835 let a: f32x4 = a.as_f32x4();
40836 let b: f32x4 = b.as_f32x4();
40837 let c: i32x4 = c.as_i32x4();
40838 let fixupimm: f32x4 = vfixupimmssz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
40839 let fixupimm: f32 = simd_extract!(fixupimm, 0);
40840 let r: f32x4 = simd_insert!(a, 0, fixupimm);
40841 transmute(src:r)
40842 }
40843}
40844
40845/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40846///
40847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514)
40848#[inline]
40849#[target_feature(enable = "avx512f")]
40850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40851#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40852#[rustc_legacy_const_generics(3)]
40853pub fn _mm_fixupimm_sd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
40854 unsafe {
40855 static_assert_uimm_bits!(IMM8, 8);
40856 let a: f64x2 = a.as_f64x2();
40857 let b: f64x2 = b.as_f64x2();
40858 let c: i64x2 = c.as_i64x2();
40859 let fixupimm: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:0b11111111, _MM_FROUND_CUR_DIRECTION);
40860 let fixupimm: f64 = simd_extract!(fixupimm, 0);
40861 let r: f64x2 = simd_insert!(a, 0, fixupimm);
40862 transmute(src:r)
40863 }
40864}
40865
40866/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40867///
40868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515)
40869#[inline]
40870#[target_feature(enable = "avx512f")]
40871#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40872#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40873#[rustc_legacy_const_generics(4)]
40874pub fn _mm_mask_fixupimm_sd<const IMM8: i32>(
40875 a: __m128d,
40876 k: __mmask8,
40877 b: __m128d,
40878 c: __m128i,
40879) -> __m128d {
40880 unsafe {
40881 static_assert_uimm_bits!(IMM8, 8);
40882 let a: f64x2 = a.as_f64x2();
40883 let b: f64x2 = b.as_f64x2();
40884 let c: i64x2 = c.as_i64x2();
40885 let fixupimm: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
40886 let fixupimm: f64 = simd_extract!(fixupimm, 0);
40887 let r: f64x2 = simd_insert!(a, 0, fixupimm);
40888 transmute(src:r)
40889 }
40890}
40891
40892/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40893///
40894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516)
40895#[inline]
40896#[target_feature(enable = "avx512f")]
40897#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40898#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40899#[rustc_legacy_const_generics(4)]
40900pub fn _mm_maskz_fixupimm_sd<const IMM8: i32>(
40901 k: __mmask8,
40902 a: __m128d,
40903 b: __m128d,
40904 c: __m128i,
40905) -> __m128d {
40906 unsafe {
40907 static_assert_uimm_bits!(IMM8, 8);
40908 let a: f64x2 = a.as_f64x2();
40909 let b: f64x2 = b.as_f64x2();
40910 let c: i64x2 = c.as_i64x2();
40911 let fixupimm: f64x2 = vfixupimmsdz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
40912 let fixupimm: f64 = simd_extract!(fixupimm, 0);
40913 let r: f64x2 = simd_insert!(a, 0, fixupimm);
40914 transmute(src:r)
40915 }
40916}
40917
40918/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40919/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40920///
40921/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511)
40922#[inline]
40923#[target_feature(enable = "avx512f")]
40924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40925#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40926#[rustc_legacy_const_generics(3, 4)]
40927pub fn _mm_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40928 a: __m128,
40929 b: __m128,
40930 c: __m128i,
40931) -> __m128 {
40932 unsafe {
40933 static_assert_uimm_bits!(IMM8, 8);
40934 static_assert_mantissas_sae!(SAE);
40935 let a: f32x4 = a.as_f32x4();
40936 let b: f32x4 = b.as_f32x4();
40937 let c: i32x4 = c.as_i32x4();
40938 let r: f32x4 = vfixupimmss(a, b, c, IMM8, mask:0b11111111, SAE);
40939 let fixupimm: f32 = simd_extract!(r, 0);
40940 let r: f32x4 = simd_insert!(a, 0, fixupimm);
40941 transmute(src:r)
40942 }
40943}
40944
40945/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40946/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40947///
40948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512)
40949#[inline]
40950#[target_feature(enable = "avx512f")]
40951#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40952#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40953#[rustc_legacy_const_generics(4, 5)]
40954pub fn _mm_mask_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40955 a: __m128,
40956 k: __mmask8,
40957 b: __m128,
40958 c: __m128i,
40959) -> __m128 {
40960 unsafe {
40961 static_assert_uimm_bits!(IMM8, 8);
40962 static_assert_mantissas_sae!(SAE);
40963 let a: f32x4 = a.as_f32x4();
40964 let b: f32x4 = b.as_f32x4();
40965 let c: i32x4 = c.as_i32x4();
40966 let r: f32x4 = vfixupimmss(a, b, c, IMM8, mask:k, SAE);
40967 let fixupimm: f32 = simd_extract!(r, 0);
40968 let r: f32x4 = simd_insert!(a, 0, fixupimm);
40969 transmute(src:r)
40970 }
40971}
40972
40973/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40974/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40975///
40976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513)
40977#[inline]
40978#[target_feature(enable = "avx512f")]
40979#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
40980#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40981#[rustc_legacy_const_generics(4, 5)]
40982pub fn _mm_maskz_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40983 k: __mmask8,
40984 a: __m128,
40985 b: __m128,
40986 c: __m128i,
40987) -> __m128 {
40988 unsafe {
40989 static_assert_uimm_bits!(IMM8, 8);
40990 static_assert_mantissas_sae!(SAE);
40991 let a: f32x4 = a.as_f32x4();
40992 let b: f32x4 = b.as_f32x4();
40993 let c: i32x4 = c.as_i32x4();
40994 let r: f32x4 = vfixupimmssz(a, b, c, IMM8, mask:k, SAE);
40995 let fixupimm: f32 = simd_extract!(r, 0);
40996 let r: f32x4 = simd_insert!(a, 0, fixupimm);
40997 transmute(src:r)
40998 }
40999}
41000
41001/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
41002/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41003///
41004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508)
41005#[inline]
41006#[target_feature(enable = "avx512f")]
41007#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41008#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
41009#[rustc_legacy_const_generics(3, 4)]
41010pub fn _mm_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
41011 a: __m128d,
41012 b: __m128d,
41013 c: __m128i,
41014) -> __m128d {
41015 unsafe {
41016 static_assert_uimm_bits!(IMM8, 8);
41017 static_assert_mantissas_sae!(SAE);
41018 let a: f64x2 = a.as_f64x2();
41019 let b: f64x2 = b.as_f64x2();
41020 let c: i64x2 = c.as_i64x2();
41021 let r: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:0b11111111, SAE);
41022 let fixupimm: f64 = simd_extract!(r, 0);
41023 let r: f64x2 = simd_insert!(a, 0, fixupimm);
41024 transmute(src:r)
41025 }
41026}
41027
41028/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
41029/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41030///
41031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509)
41032#[inline]
41033#[target_feature(enable = "avx512f")]
41034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41035#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
41036#[rustc_legacy_const_generics(4, 5)]
41037pub fn _mm_mask_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
41038 a: __m128d,
41039 k: __mmask8,
41040 b: __m128d,
41041 c: __m128i,
41042) -> __m128d {
41043 unsafe {
41044 static_assert_uimm_bits!(IMM8, 8);
41045 static_assert_mantissas_sae!(SAE);
41046 let a: f64x2 = a.as_f64x2();
41047 let b: f64x2 = b.as_f64x2();
41048 let c: i64x2 = c.as_i64x2();
41049 let r: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:k, SAE);
41050 let fixupimm: f64 = simd_extract!(r, 0);
41051 let r: f64x2 = simd_insert!(a, 0, fixupimm);
41052 transmute(src:r)
41053 }
41054}
41055
41056/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
41057/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41058///
41059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510)
41060#[inline]
41061#[target_feature(enable = "avx512f")]
41062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41063#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
41064#[rustc_legacy_const_generics(4, 5)]
41065pub fn _mm_maskz_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
41066 k: __mmask8,
41067 a: __m128d,
41068 b: __m128d,
41069 c: __m128i,
41070) -> __m128d {
41071 unsafe {
41072 static_assert_uimm_bits!(IMM8, 8);
41073 static_assert_mantissas_sae!(SAE);
41074 let a: f64x2 = a.as_f64x2();
41075 let b: f64x2 = b.as_f64x2();
41076 let c: i64x2 = c.as_i64x2();
41077 let r: f64x2 = vfixupimmsdz(a, b, c, IMM8, mask:k, SAE);
41078 let fixupimm: f64 = simd_extract!(r, 0);
41079 let r: f64x2 = simd_insert!(a, 0, fixupimm);
41080 transmute(src:r)
41081 }
41082}
41083
41084/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41085///
41086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtss_sd&expand=1896)
41087#[inline]
41088#[target_feature(enable = "avx512f")]
41089#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41090#[cfg_attr(test, assert_instr(vcvtss2sd))]
41091pub fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41092 unsafe {
41093 transmute(src:vcvtss2sd(
41094 a.as_f64x2(),
41095 b.as_f32x4(),
41096 src.as_f64x2(),
41097 mask:k,
41098 _MM_FROUND_CUR_DIRECTION,
41099 ))
41100 }
41101}
41102
41103/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41104///
41105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897)
41106#[inline]
41107#[target_feature(enable = "avx512f")]
41108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41109#[cfg_attr(test, assert_instr(vcvtss2sd))]
41110pub fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41111 unsafe {
41112 transmute(src:vcvtss2sd(
41113 a.as_f64x2(),
41114 b.as_f32x4(),
41115 src:f64x2::ZERO,
41116 mask:k,
41117 _MM_FROUND_CUR_DIRECTION,
41118 ))
41119 }
41120}
41121
41122/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41123///
41124/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtsd_ss&expand=1797)
41125#[inline]
41126#[target_feature(enable = "avx512f")]
41127#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41128#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41129pub fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41130 unsafe {
41131 transmute(src:vcvtsd2ss(
41132 a.as_f32x4(),
41133 b.as_f64x2(),
41134 src.as_f32x4(),
41135 mask:k,
41136 _MM_FROUND_CUR_DIRECTION,
41137 ))
41138 }
41139}
41140
41141/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41142///
41143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtsd_ss&expand=1798)
41144#[inline]
41145#[target_feature(enable = "avx512f")]
41146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41147#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41148pub fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41149 unsafe {
41150 transmute(src:vcvtsd2ss(
41151 a.as_f32x4(),
41152 b.as_f64x2(),
41153 src:f32x4::ZERO,
41154 mask:k,
41155 _MM_FROUND_CUR_DIRECTION,
41156 ))
41157 }
41158}
41159
41160/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41162///
41163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371)
41164#[inline]
41165#[target_feature(enable = "avx512f")]
41166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41167#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41168#[rustc_legacy_const_generics(2)]
41169pub fn _mm_cvt_roundss_sd<const SAE: i32>(a: __m128d, b: __m128) -> __m128d {
41170 unsafe {
41171 static_assert_sae!(SAE);
41172 let a: f64x2 = a.as_f64x2();
41173 let b: f32x4 = b.as_f32x4();
41174 let r: f64x2 = vcvtss2sd(a, b, src:f64x2::ZERO, mask:0b11111111, SAE);
41175 transmute(src:r)
41176 }
41177}
41178
41179/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41180/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41181///
41182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372)
41183#[inline]
41184#[target_feature(enable = "avx512f")]
41185#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41186#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41187#[rustc_legacy_const_generics(4)]
41188pub fn _mm_mask_cvt_roundss_sd<const SAE: i32>(
41189 src: __m128d,
41190 k: __mmask8,
41191 a: __m128d,
41192 b: __m128,
41193) -> __m128d {
41194 unsafe {
41195 static_assert_sae!(SAE);
41196 let a: f64x2 = a.as_f64x2();
41197 let b: f32x4 = b.as_f32x4();
41198 let src: f64x2 = src.as_f64x2();
41199 let r: f64x2 = vcvtss2sd(a, b, src, mask:k, SAE);
41200 transmute(src:r)
41201 }
41202}
41203
41204/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41205/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41206///
41207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373)
41208#[inline]
41209#[target_feature(enable = "avx512f")]
41210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41211#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41212#[rustc_legacy_const_generics(3)]
41213pub fn _mm_maskz_cvt_roundss_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41214 unsafe {
41215 static_assert_sae!(SAE);
41216 let a: f64x2 = a.as_f64x2();
41217 let b: f32x4 = b.as_f32x4();
41218 let r: f64x2 = vcvtss2sd(a, b, src:f64x2::ZERO, mask:k, SAE);
41219 transmute(src:r)
41220 }
41221}
41222
41223/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41224/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41225/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41226/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41227/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41228/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41229/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41230///
41231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_ss&expand=1361)
41232#[inline]
41233#[target_feature(enable = "avx512f")]
41234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41235#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41236#[rustc_legacy_const_generics(2)]
41237pub fn _mm_cvt_roundsd_ss<const ROUNDING: i32>(a: __m128, b: __m128d) -> __m128 {
41238 unsafe {
41239 static_assert_rounding!(ROUNDING);
41240 let a: f32x4 = a.as_f32x4();
41241 let b: f64x2 = b.as_f64x2();
41242 let r: f32x4 = vcvtsd2ss(a, b, src:f32x4::ZERO, mask:0b11111111, ROUNDING);
41243 transmute(src:r)
41244 }
41245}
41246
41247/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41248/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41249/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41250/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41251/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41252/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41253/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41254///
41255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundsd_ss&expand=1362)
41256#[inline]
41257#[target_feature(enable = "avx512f")]
41258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41259#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41260#[rustc_legacy_const_generics(4)]
41261pub fn _mm_mask_cvt_roundsd_ss<const ROUNDING: i32>(
41262 src: __m128,
41263 k: __mmask8,
41264 a: __m128,
41265 b: __m128d,
41266) -> __m128 {
41267 unsafe {
41268 static_assert_rounding!(ROUNDING);
41269 let a: f32x4 = a.as_f32x4();
41270 let b: f64x2 = b.as_f64x2();
41271 let src: f32x4 = src.as_f32x4();
41272 let r: f32x4 = vcvtsd2ss(a, b, src, mask:k, ROUNDING);
41273 transmute(src:r)
41274 }
41275}
41276
41277/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41278/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41279/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41280/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41281/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41282/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41283/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41284///
41285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundsd_ss&expand=1363)
41286#[inline]
41287#[target_feature(enable = "avx512f")]
41288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41289#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41290#[rustc_legacy_const_generics(3)]
41291pub fn _mm_maskz_cvt_roundsd_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41292 unsafe {
41293 static_assert_rounding!(ROUNDING);
41294 let a: f32x4 = a.as_f32x4();
41295 let b: f64x2 = b.as_f64x2();
41296 let r: f32x4 = vcvtsd2ss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
41297 transmute(src:r)
41298 }
41299}
41300
41301/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41302/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41303/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41304/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41305/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41306/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41307/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41308///
41309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_si32&expand=1374)
41310#[inline]
41311#[target_feature(enable = "avx512f")]
41312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41313#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
41314#[rustc_legacy_const_generics(1)]
41315pub fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
41316 unsafe {
41317 static_assert_rounding!(ROUNDING);
41318 let a: f32x4 = a.as_f32x4();
41319 vcvtss2si(a, ROUNDING)
41320 }
41321}
41322
41323/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41324/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41325/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41326/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41327/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41328/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41329/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41330///
41331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_i32&expand=1369)
41332#[inline]
41333#[target_feature(enable = "avx512f")]
41334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41335#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
41336#[rustc_legacy_const_generics(1)]
41337pub fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
41338 unsafe {
41339 static_assert_rounding!(ROUNDING);
41340 let a: f32x4 = a.as_f32x4();
41341 vcvtss2si(a, ROUNDING)
41342 }
41343}
41344
41345/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41346/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41347/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41348/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41349/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41350/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41351/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41352///
41353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_u32&expand=1376)
41354#[inline]
41355#[target_feature(enable = "avx512f")]
41356#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41357#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))]
41358#[rustc_legacy_const_generics(1)]
41359pub fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
41360 unsafe {
41361 static_assert_rounding!(ROUNDING);
41362 let a: f32x4 = a.as_f32x4();
41363 vcvtss2usi(a, ROUNDING)
41364 }
41365}
41366
41367/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41368///
41369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_i32&expand=1893)
41370#[inline]
41371#[target_feature(enable = "avx512f")]
41372#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41373#[cfg_attr(test, assert_instr(vcvtss2si))]
41374pub fn _mm_cvtss_i32(a: __m128) -> i32 {
41375 unsafe { vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41376}
41377
41378/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41379///
41380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_u32&expand=1901)
41381#[inline]
41382#[target_feature(enable = "avx512f")]
41383#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41384#[cfg_attr(test, assert_instr(vcvtss2usi))]
41385pub fn _mm_cvtss_u32(a: __m128) -> u32 {
41386 unsafe { vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41387}
41388
41389/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41390/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41391/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41392/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41393/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41394/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41395/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41396///
41397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_si32&expand=1359)
41398#[inline]
41399#[target_feature(enable = "avx512f")]
41400#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41401#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
41402#[rustc_legacy_const_generics(1)]
41403pub fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
41404 unsafe {
41405 static_assert_rounding!(ROUNDING);
41406 let a: f64x2 = a.as_f64x2();
41407 vcvtsd2si(a, ROUNDING)
41408 }
41409}
41410
41411/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41412/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41413/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41414/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41415/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41416/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41417/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41418///
41419/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_i32&expand=1357)
41420#[inline]
41421#[target_feature(enable = "avx512f")]
41422#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41423#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
41424#[rustc_legacy_const_generics(1)]
41425pub fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
41426 unsafe {
41427 static_assert_rounding!(ROUNDING);
41428 let a: f64x2 = a.as_f64x2();
41429 vcvtsd2si(a, ROUNDING)
41430 }
41431}
41432
41433/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41434/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41435/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41436/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41437/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41438/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41439/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41440///
41441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_u32&expand=1364)
41442#[inline]
41443#[target_feature(enable = "avx512f")]
41444#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41445#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))]
41446#[rustc_legacy_const_generics(1)]
41447pub fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
41448 unsafe {
41449 static_assert_rounding!(ROUNDING);
41450 let a: f64x2 = a.as_f64x2();
41451 vcvtsd2usi(a, ROUNDING)
41452 }
41453}
41454
41455/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41456///
41457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_i32&expand=1791)
41458#[inline]
41459#[target_feature(enable = "avx512f")]
41460#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41461#[cfg_attr(test, assert_instr(vcvtsd2si))]
41462pub fn _mm_cvtsd_i32(a: __m128d) -> i32 {
41463 unsafe { vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41464}
41465
41466/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41467///
41468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_u32&expand=1799)
41469#[inline]
41470#[target_feature(enable = "avx512f")]
41471#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41472#[cfg_attr(test, assert_instr(vcvtsd2usi))]
41473pub fn _mm_cvtsd_u32(a: __m128d) -> u32 {
41474 unsafe { vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41475}
41476
41477/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41478///
41479/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41480/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41481/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41482/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41483/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41484/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41485///
41486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundi32_ss&expand=1312)
41487#[inline]
41488#[target_feature(enable = "avx512f")]
41489#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41490#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
41491#[rustc_legacy_const_generics(2)]
41492pub fn _mm_cvt_roundi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41493 unsafe {
41494 static_assert_rounding!(ROUNDING);
41495 let a: f32x4 = a.as_f32x4();
41496 let r: f32x4 = vcvtsi2ss(a, b, ROUNDING);
41497 transmute(src:r)
41498 }
41499}
41500
41501/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41502///
41503/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41504/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41505/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41506/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41507/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41508/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41509///
41510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsi32_ss&expand=1366)
41511#[inline]
41512#[target_feature(enable = "avx512f")]
41513#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41514#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
41515#[rustc_legacy_const_generics(2)]
41516pub fn _mm_cvt_roundsi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41517 unsafe {
41518 static_assert_rounding!(ROUNDING);
41519 let a: f32x4 = a.as_f32x4();
41520 let r: f32x4 = vcvtsi2ss(a, b, ROUNDING);
41521 transmute(src:r)
41522 }
41523}
41524
41525/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41526/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41527/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41528/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41529/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41530/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41531/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41532///
41533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundu32_ss&expand=1378)
41534#[inline]
41535#[target_feature(enable = "avx512f")]
41536#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41537#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))]
41538#[rustc_legacy_const_generics(2)]
41539pub fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m128 {
41540 unsafe {
41541 static_assert_rounding!(ROUNDING);
41542 let a: f32x4 = a.as_f32x4();
41543 let r: f32x4 = vcvtusi2ss(a, b, ROUNDING);
41544 transmute(src:r)
41545 }
41546}
41547
41548/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41549///
41550/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643)
41551#[inline]
41552#[target_feature(enable = "avx512f")]
41553#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41554#[cfg_attr(test, assert_instr(vcvtsi2ss))]
41555pub fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
41556 unsafe {
41557 let b: f32 = b as f32;
41558 simd_insert!(a, 0, b)
41559 }
41560}
41561
41562/// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41563///
41564/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_sd&expand=1642)
41565#[inline]
41566#[target_feature(enable = "avx512f")]
41567#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41568#[cfg_attr(test, assert_instr(vcvtsi2sd))]
41569pub fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
41570 unsafe {
41571 let b: f64 = b as f64;
41572 simd_insert!(a, 0, b)
41573 }
41574}
41575
41576/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41577/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41578///
41579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_si32&expand=1936)
41580#[inline]
41581#[target_feature(enable = "avx512f")]
41582#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41583#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
41584#[rustc_legacy_const_generics(1)]
41585pub fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
41586 unsafe {
41587 static_assert_sae!(SAE);
41588 let a: f32x4 = a.as_f32x4();
41589 vcvttss2si(a, SAE)
41590 }
41591}
41592
41593/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41594/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41595///
41596/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_i32&expand=1934)
41597#[inline]
41598#[target_feature(enable = "avx512f")]
41599#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41600#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
41601#[rustc_legacy_const_generics(1)]
41602pub fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
41603 unsafe {
41604 static_assert_sae!(SAE);
41605 let a: f32x4 = a.as_f32x4();
41606 vcvttss2si(a, SAE)
41607 }
41608}
41609
41610/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41611/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41612///
41613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_u32&expand=1938)
41614#[inline]
41615#[target_feature(enable = "avx512f")]
41616#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41617#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))]
41618#[rustc_legacy_const_generics(1)]
41619pub fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
41620 unsafe {
41621 static_assert_sae!(SAE);
41622 let a: f32x4 = a.as_f32x4();
41623 vcvttss2usi(a, SAE)
41624 }
41625}
41626
41627/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41628///
41629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_i32&expand=2022)
41630#[inline]
41631#[target_feature(enable = "avx512f")]
41632#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41633#[cfg_attr(test, assert_instr(vcvttss2si))]
41634pub fn _mm_cvttss_i32(a: __m128) -> i32 {
41635 unsafe { vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41636}
41637
41638/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41639///
41640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u32&expand=2026)
41641#[inline]
41642#[target_feature(enable = "avx512f")]
41643#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41644#[cfg_attr(test, assert_instr(vcvttss2usi))]
41645pub fn _mm_cvttss_u32(a: __m128) -> u32 {
41646 unsafe { vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41647}
41648
41649/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41650/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41651///
41652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si32&expand=1930)
41653#[inline]
41654#[target_feature(enable = "avx512f")]
41655#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41656#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
41657#[rustc_legacy_const_generics(1)]
41658pub fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
41659 unsafe {
41660 static_assert_sae!(SAE);
41661 let a: f64x2 = a.as_f64x2();
41662 vcvttsd2si(a, SAE)
41663 }
41664}
41665
41666/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41667/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41668///
41669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i32&expand=1928)
41670#[inline]
41671#[target_feature(enable = "avx512f")]
41672#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41673#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
41674#[rustc_legacy_const_generics(1)]
41675pub fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
41676 unsafe {
41677 static_assert_sae!(SAE);
41678 let a: f64x2 = a.as_f64x2();
41679 vcvttsd2si(a, SAE)
41680 }
41681}
41682
41683/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41684/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41685///
41686/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundsd_u32&expand=1932)
41687#[inline]
41688#[target_feature(enable = "avx512f")]
41689#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41690#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))]
41691#[rustc_legacy_const_generics(1)]
41692pub fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
41693 unsafe {
41694 static_assert_sae!(SAE);
41695 let a: f64x2 = a.as_f64x2();
41696 vcvttsd2usi(a, SAE)
41697 }
41698}
41699
41700/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41701///
41702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i32&expand=2015)
41703#[inline]
41704#[target_feature(enable = "avx512f")]
41705#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41706#[cfg_attr(test, assert_instr(vcvttsd2si))]
41707pub fn _mm_cvttsd_i32(a: __m128d) -> i32 {
41708 unsafe { vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41709}
41710
41711/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41712///
41713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u32&expand=2020)
41714#[inline]
41715#[target_feature(enable = "avx512f")]
41716#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41717#[cfg_attr(test, assert_instr(vcvttsd2usi))]
41718pub fn _mm_cvttsd_u32(a: __m128d) -> u32 {
41719 unsafe { vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41720}
41721
41722/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41723///
41724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_ss&expand=2032)
41725#[inline]
41726#[target_feature(enable = "avx512f")]
41727#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41728#[cfg_attr(test, assert_instr(vcvtusi2ss))]
41729pub fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
41730 unsafe {
41731 let b: f32 = b as f32;
41732 simd_insert!(a, 0, b)
41733 }
41734}
41735
41736/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41737///
41738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_sd&expand=2031)
41739#[inline]
41740#[target_feature(enable = "avx512f")]
41741#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41742#[cfg_attr(test, assert_instr(vcvtusi2sd))]
41743pub fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
41744 unsafe {
41745 let b: f64 = b as f64;
41746 simd_insert!(a, 0, b)
41747 }
41748}
41749
41750/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41751/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41752///
41753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_ss&expand=1175)
41754#[inline]
41755#[target_feature(enable = "avx512f")]
41756#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41757#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomiss
41758#[rustc_legacy_const_generics(2, 3)]
41759pub fn _mm_comi_round_ss<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> i32 {
41760 unsafe {
41761 static_assert_uimm_bits!(IMM5, 5);
41762 static_assert_mantissas_sae!(SAE);
41763 let a: f32x4 = a.as_f32x4();
41764 let b: f32x4 = b.as_f32x4();
41765 vcomiss(a, b, IMM5, SAE)
41766 }
41767}
41768
41769/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41770/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41771///
41772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_sd&expand=1174)
41773#[inline]
41774#[target_feature(enable = "avx512f")]
41775#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41776#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomisd
41777#[rustc_legacy_const_generics(2, 3)]
41778pub fn _mm_comi_round_sd<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> i32 {
41779 unsafe {
41780 static_assert_uimm_bits!(IMM5, 5);
41781 static_assert_mantissas_sae!(SAE);
41782 let a: f64x2 = a.as_f64x2();
41783 let b: f64x2 = b.as_f64x2();
41784 vcomisd(a, b, IMM5, SAE)
41785 }
41786}
41787
41788/// Equal
41789#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41790pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
41791/// Less-than
41792#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41793pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
41794/// Less-than-or-equal
41795#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41796pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
41797/// False
41798#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41799pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
41800/// Not-equal
41801#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41802pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
41803/// Not less-than
41804#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41805pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
41806/// Not less-than-or-equal
41807#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41808pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
41809/// True
41810#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41811pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
41812
41813/// interval [1, 2)
41814#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41815pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00;
41816/// interval [0.5, 2)
41817#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41818pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01;
41819/// interval [0.5, 1)
41820#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41821pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02;
41822/// interval [0.75, 1.5)
41823#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41824pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03;
41825
41826/// sign = sign(SRC)
41827#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41828pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00;
41829/// sign = 0
41830#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41831pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01;
41832/// DEST = NaN if sign(SRC) = 1
41833#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41834pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02;
41835
41836#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41837pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00;
41838#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41839pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01;
41840#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41841pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02;
41842#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41843pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03;
41844#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41845pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04;
41846#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41847pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05;
41848#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41849pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06;
41850#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41851pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07;
41852#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41853pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08;
41854#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41855pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09;
41856#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41857pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A;
41858#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41859pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B;
41860#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41861pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C;
41862#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41863pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D;
41864#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41865pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E;
41866#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41867pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F;
41868#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41869pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10;
41870#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41871pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11;
41872#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41873pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12;
41874#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41875pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13;
41876#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41877pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14;
41878#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41879pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15;
41880#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41881pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16;
41882#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41883pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17;
41884#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41885pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18;
41886#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41887pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19;
41888#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41889pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A;
41890#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41891pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B;
41892#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41893pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C;
41894#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41895pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D;
41896#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41897pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E;
41898#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41899pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F;
41900#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41901pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20;
41902#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41903pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21;
41904#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41905pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22;
41906#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41907pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23;
41908#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41909pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24;
41910#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41911pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25;
41912#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41913pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26;
41914#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41915pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27;
41916#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41917pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28;
41918#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41919pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29;
41920#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41921pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A;
41922#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41923pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B;
41924#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41925pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C;
41926#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41927pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D;
41928#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41929pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E;
41930#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41931pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F;
41932#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41933pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30;
41934#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41935pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31;
41936#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41937pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32;
41938#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41939pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33;
41940#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41941pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34;
41942#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41943pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35;
41944#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41945pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36;
41946#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41947pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37;
41948#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41949pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38;
41950#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41951pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39;
41952#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41953pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A;
41954#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41955pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B;
41956#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41957pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C;
41958#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41959pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D;
41960#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41961pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E;
41962#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41963pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F;
41964#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41965pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40;
41966#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41967pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41;
41968#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41969pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42;
41970#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41971pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43;
41972#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41973pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44;
41974#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41975pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45;
41976#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41977pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46;
41978#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41979pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47;
41980#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41981pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48;
41982#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41983pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49;
41984#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41985pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A;
41986#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41987pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B;
41988#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41989pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C;
41990#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41991pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D;
41992#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41993pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E;
41994#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41995pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F;
41996#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41997pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50;
41998#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
41999pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51;
42000#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42001pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52;
42002#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42003pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53;
42004#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42005pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54;
42006#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42007pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55;
42008#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42009pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56;
42010#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42011pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57;
42012#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42013pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58;
42014#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42015pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59;
42016#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42017pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A;
42018#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42019pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B;
42020#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42021pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C;
42022#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42023pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D;
42024#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42025pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E;
42026#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42027pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F;
42028#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42029pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60;
42030#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42031pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61;
42032#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42033pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62;
42034#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42035pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63;
42036#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42037pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64;
42038#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42039pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65;
42040#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42041pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66;
42042#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42043pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67;
42044#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42045pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68;
42046#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42047pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69;
42048#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42049pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A;
42050#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42051pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B;
42052#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42053pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C;
42054#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42055pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D;
42056#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42057pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E;
42058#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42059pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F;
42060#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42061pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70;
42062#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42063pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71;
42064#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42065pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72;
42066#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42067pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73;
42068#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42069pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74;
42070#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42071pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75;
42072#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42073pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76;
42074#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42075pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77;
42076#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42077pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78;
42078#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42079pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79;
42080#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42081pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A;
42082#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42083pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B;
42084#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42085pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C;
42086#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42087pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D;
42088#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42089pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E;
42090#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42091pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F;
42092#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42093pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80;
42094#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42095pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81;
42096#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42097pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82;
42098#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42099pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83;
42100#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42101pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84;
42102#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42103pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85;
42104#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42105pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86;
42106#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42107pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87;
42108#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42109pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88;
42110#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42111pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89;
42112#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42113pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A;
42114#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42115pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B;
42116#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42117pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C;
42118#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42119pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D;
42120#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42121pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E;
42122#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42123pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F;
42124#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42125pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90;
42126#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42127pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91;
42128#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42129pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92;
42130#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42131pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93;
42132#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42133pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94;
42134#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42135pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95;
42136#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42137pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96;
42138#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42139pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97;
42140#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42141pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98;
42142#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42143pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99;
42144#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42145pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A;
42146#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42147pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B;
42148#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42149pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C;
42150#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42151pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D;
42152#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42153pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E;
42154#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42155pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F;
42156#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42157pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0;
42158#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42159pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1;
42160#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42161pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2;
42162#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42163pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3;
42164#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42165pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4;
42166#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42167pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5;
42168#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42169pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6;
42170#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42171pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7;
42172#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42173pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8;
42174#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42175pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9;
42176#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42177pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA;
42178#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42179pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB;
42180#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42181pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC;
42182#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42183pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD;
42184#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42185pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE;
42186#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42187pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF;
42188#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42189pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0;
42190#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42191pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1;
42192#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42193pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2;
42194#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42195pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3;
42196#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42197pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4;
42198#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42199pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5;
42200#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42201pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6;
42202#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42203pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7;
42204#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42205pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8;
42206#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42207pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9;
42208#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42209pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA;
42210#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42211pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB;
42212#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42213pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC;
42214#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42215pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD;
42216#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42217pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE;
42218#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42219pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF;
42220#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42221pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0;
42222#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42223pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1;
42224#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42225pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2;
42226#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42227pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3;
42228#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42229pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4;
42230#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42231pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5;
42232#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42233pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6;
42234#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42235pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7;
42236#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42237pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8;
42238#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42239pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9;
42240#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42241pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA;
42242#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42243pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB;
42244#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42245pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC;
42246#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42247pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD;
42248#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42249pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE;
42250#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42251pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF;
42252#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42253pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0;
42254#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42255pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1;
42256#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42257pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2;
42258#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42259pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3;
42260#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42261pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4;
42262#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42263pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5;
42264#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42265pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6;
42266#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42267pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7;
42268#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42269pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8;
42270#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42271pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9;
42272#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42273pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA;
42274#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42275pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB;
42276#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42277pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC;
42278#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42279pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD;
42280#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42281pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE;
42282#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42283pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF;
42284#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42285pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0;
42286#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42287pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1;
42288#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42289pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2;
42290#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42291pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3;
42292#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42293pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4;
42294#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42295pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5;
42296#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42297pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6;
42298#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42299pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7;
42300#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42301pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8;
42302#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42303pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9;
42304#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42305pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA;
42306#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42307pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB;
42308#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42309pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC;
42310#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42311pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED;
42312#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42313pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE;
42314#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42315pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF;
42316#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42317pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0;
42318#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42319pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1;
42320#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42321pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2;
42322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42323pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3;
42324#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42325pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4;
42326#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42327pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5;
42328#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42329pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6;
42330#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42331pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7;
42332#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42333pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8;
42334#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42335pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9;
42336#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42337pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA;
42338#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42339pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB;
42340#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42341pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC;
42342#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42343pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD;
42344#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42345pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
42346#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
42347pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
42348
42349#[allow(improper_ctypes)]
42350unsafe extern "C" {
42351 #[link_name = "llvm.x86.avx512.sqrt.ps.512"]
42352 unsafefn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
42353 #[link_name = "llvm.x86.avx512.sqrt.pd.512"]
42354 unsafefn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
42355
42356 #[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
42357 unsafefn vfmadd132psround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512;
42358 #[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
42359 unsafefn vfmadd132pdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d;
42360
42361 #[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
42362 unsafefn vfmaddsubpsround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512; //from clang
42363 #[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
42364 unsafefn vfmaddsubpdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d; //from clang
42365
42366 #[link_name = "llvm.x86.avx512.add.ps.512"]
42367 unsafefn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42368 #[link_name = "llvm.x86.avx512.add.pd.512"]
42369 unsafefn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42370 #[link_name = "llvm.x86.avx512.sub.ps.512"]
42371 unsafefn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42372 #[link_name = "llvm.x86.avx512.sub.pd.512"]
42373 unsafefn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42374 #[link_name = "llvm.x86.avx512.mul.ps.512"]
42375 unsafefn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42376 #[link_name = "llvm.x86.avx512.mul.pd.512"]
42377 unsafefn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42378 #[link_name = "llvm.x86.avx512.div.ps.512"]
42379 unsafefn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42380 #[link_name = "llvm.x86.avx512.div.pd.512"]
42381 unsafefn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42382
42383 #[link_name = "llvm.x86.avx512.max.ps.512"]
42384 unsafefn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42385 #[link_name = "llvm.x86.avx512.max.pd.512"]
42386 unsafefn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42387 #[link_name = "llvm.x86.avx512.min.ps.512"]
42388 unsafefn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42389 #[link_name = "llvm.x86.avx512.min.pd.512"]
42390 unsafefn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42391
42392 #[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
42393 unsafefn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
42394
42395 #[link_name = "llvm.x86.avx512.mask.getexp.ps.256"]
42396 unsafefn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42397 #[link_name = "llvm.x86.avx512.mask.getexp.ps.128"]
42398 unsafefn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42399
42400 #[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
42401 unsafefn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
42402 #[link_name = "llvm.x86.avx512.mask.getexp.pd.256"]
42403 unsafefn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42404 #[link_name = "llvm.x86.avx512.mask.getexp.pd.128"]
42405 unsafefn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42406
42407 #[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
42408 unsafefn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
42409 #[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"]
42410 unsafefn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8;
42411 #[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"]
42412 unsafefn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4;
42413
42414 #[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
42415 unsafefn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
42416 #[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"]
42417 unsafefn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4;
42418 #[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"]
42419 unsafefn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2;
42420
42421 #[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
42422 unsafefn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
42423 #[link_name = "llvm.x86.avx512.mask.scalef.ps.256"]
42424 unsafefn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8;
42425 #[link_name = "llvm.x86.avx512.mask.scalef.ps.128"]
42426 unsafefn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
42427
42428 #[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
42429 unsafefn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
42430 #[link_name = "llvm.x86.avx512.mask.scalef.pd.256"]
42431 unsafefn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4;
42432 #[link_name = "llvm.x86.avx512.mask.scalef.pd.128"]
42433 unsafefn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
42434
42435 #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
42436 unsafefn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42437 #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"]
42438 unsafefn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42439 #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"]
42440 unsafefn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42441
42442 #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
42443 unsafefn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42444 #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"]
42445 unsafefn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42446 #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"]
42447 unsafefn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42448
42449 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
42450 unsafefn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42451 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"]
42452 unsafefn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42453 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"]
42454 unsafefn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42455
42456 #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
42457 unsafefn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42458 #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"]
42459 unsafefn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42460 #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"]
42461 unsafefn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42462
42463 #[link_name = "llvm.x86.avx512.pternlog.d.512"]
42464 unsafefn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16;
42465 #[link_name = "llvm.x86.avx512.pternlog.d.256"]
42466 unsafefn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8;
42467 #[link_name = "llvm.x86.avx512.pternlog.d.128"]
42468 unsafefn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4;
42469
42470 #[link_name = "llvm.x86.avx512.pternlog.q.512"]
42471 unsafefn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8;
42472 #[link_name = "llvm.x86.avx512.pternlog.q.256"]
42473 unsafefn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4;
42474 #[link_name = "llvm.x86.avx512.pternlog.q.128"]
42475 unsafefn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2;
42476
42477 #[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
42478 unsafefn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
42479 #[link_name = "llvm.x86.avx512.mask.getmant.ps.256"]
42480 unsafefn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8;
42481 #[link_name = "llvm.x86.avx512.mask.getmant.ps.128"]
42482 unsafefn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4;
42483
42484 #[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
42485 unsafefn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
42486 #[link_name = "llvm.x86.avx512.mask.getmant.pd.256"]
42487 unsafefn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4;
42488 #[link_name = "llvm.x86.avx512.mask.getmant.pd.128"]
42489 unsafefn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2;
42490
42491 #[link_name = "llvm.x86.avx512.rcp14.ps.512"]
42492 unsafefn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42493 #[link_name = "llvm.x86.avx512.rcp14.ps.256"]
42494 unsafefn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42495 #[link_name = "llvm.x86.avx512.rcp14.ps.128"]
42496 unsafefn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42497
42498 #[link_name = "llvm.x86.avx512.rcp14.pd.512"]
42499 unsafefn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42500 #[link_name = "llvm.x86.avx512.rcp14.pd.256"]
42501 unsafefn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42502 #[link_name = "llvm.x86.avx512.rcp14.pd.128"]
42503 unsafefn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42504
42505 #[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
42506 unsafefn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42507 #[link_name = "llvm.x86.avx512.rsqrt14.ps.256"]
42508 unsafefn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42509 #[link_name = "llvm.x86.avx512.rsqrt14.ps.128"]
42510 unsafefn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42511
42512 #[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
42513 unsafefn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42514 #[link_name = "llvm.x86.avx512.rsqrt14.pd.256"]
42515 unsafefn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42516 #[link_name = "llvm.x86.avx512.rsqrt14.pd.128"]
42517 unsafefn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42518
42519 #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
42520 unsafefn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42521
42522 #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
42523 unsafefn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42524 #[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
42525 unsafefn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42526 #[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
42527 unsafefn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42528
42529 #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
42530 unsafefn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
42531 #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
42532 unsafefn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
42533
42534 #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
42535 unsafefn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42536
42537 #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
42538 unsafefn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
42539 #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
42540 unsafefn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
42541 #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
42542 unsafefn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
42543
42544 #[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
42545 unsafefn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
42546 #[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
42547 unsafefn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
42548
42549 #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
42550 unsafefn vcvtps2ph(a: f32x16, rounding: i32, src: i16x16, mask: u16) -> i16x16;
42551 #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
42552 unsafefn vcvtps2ph256(a: f32x8, imm8: i32, src: i16x8, mask: u8) -> i16x8;
42553 #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
42554 unsafefn vcvtps2ph128(a: f32x4, imm8: i32, src: i16x8, mask: u8) -> i16x8;
42555
42556 #[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
42557 unsafefn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
42558
42559 #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
42560 unsafefn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42561 #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
42562 unsafefn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
42563 #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
42564 unsafefn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
42565
42566 #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
42567 unsafefn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42568 #[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
42569 unsafefn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42570 #[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
42571 unsafefn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42572
42573 #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
42574 unsafefn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42575 #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
42576 unsafefn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
42577 #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
42578 unsafefn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
42579
42580 #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
42581 unsafefn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
42582 #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
42583 unsafefn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
42584 #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
42585 unsafefn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
42586
42587 #[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
42588 unsafefn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42589 #[link_name = "llvm.x86.avx512.mask.pmov.db.256"]
42590 unsafefn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42591 #[link_name = "llvm.x86.avx512.mask.pmov.db.128"]
42592 unsafefn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42593
42594 #[link_name = "llvm.x86.avx512.mask.pmov.qw.256"]
42595 unsafefn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42596 #[link_name = "llvm.x86.avx512.mask.pmov.qw.128"]
42597 unsafefn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42598 #[link_name = "llvm.x86.avx512.mask.pmov.qb.256"]
42599 unsafefn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42600 #[link_name = "llvm.x86.avx512.mask.pmov.qb.128"]
42601 unsafefn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42602 #[link_name = "llvm.x86.avx512.mask.pmov.qd.128"]
42603 unsafefn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42604
42605 #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"]
42606 unsafefn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42607 #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"]
42608 unsafefn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42609 #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
42610 unsafefn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42611
42612 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
42613 unsafefn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42614 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
42615 unsafefn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42616 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
42617 unsafefn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42618
42619 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
42620 unsafefn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42621 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
42622 unsafefn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42623 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
42624 unsafefn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42625
42626 #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
42627 unsafefn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42628 #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
42629 unsafefn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42630 #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
42631 unsafefn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42632
42633 #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
42634 unsafefn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42635 #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
42636 unsafefn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42637 #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
42638 unsafefn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42639
42640 #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
42641 unsafefn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42642 #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
42643 unsafefn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42644 #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
42645 unsafefn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42646
42647 #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
42648 unsafefn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42649 #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
42650 unsafefn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42651 #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
42652 unsafefn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42653
42654 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
42655 unsafefn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42656 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
42657 unsafefn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42658 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
42659 unsafefn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42660
42661 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
42662 unsafefn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42663 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
42664 unsafefn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42665 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
42666 unsafefn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42667
42668 #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
42669 unsafefn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42670 #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
42671 unsafefn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42672 #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
42673 unsafefn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42674
42675 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
42676 unsafefn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42677 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
42678 unsafefn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42679 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
42680 unsafefn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42681
42682 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
42683 unsafefn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42684 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
42685 unsafefn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42686 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
42687 unsafefn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42688
42689 #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
42690 unsafefn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42691 #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
42692 unsafefn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42693 #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
42694 unsafefn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42695
42696 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
42697 unsafefn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42698 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
42699 unsafefn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42700 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
42701 unsafefn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42702
42703 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
42704 unsafefn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42705 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
42706 unsafefn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42707 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
42708 unsafefn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42709
42710 #[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
42711 unsafefn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42712
42713 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
42714 unsafefn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
42715 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
42716 unsafefn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
42717 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
42718 unsafefn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42719
42720 #[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
42721 unsafefn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
42722 #[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
42723 unsafefn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42724 #[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
42725 unsafefn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42726
42727 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
42728 unsafefn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
42729 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
42730 unsafefn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
42731 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
42732 unsafefn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42733
42734 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
42735 unsafefn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
42736 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
42737 unsafefn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42738 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
42739 unsafefn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42740
42741 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
42742 unsafefn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42743 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
42744 unsafefn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42745 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
42746 unsafefn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42747
42748 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
42749 unsafefn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
42750 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
42751 unsafefn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
42752 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
42753 unsafefn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
42754
42755 #[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
42756 unsafefn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
42757 #[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
42758 unsafefn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
42759 #[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
42760 unsafefn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
42761
42762 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
42763 unsafefn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
42764 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
42765 unsafefn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
42766 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
42767 unsafefn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
42768
42769 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
42770 unsafefn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
42771 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
42772 unsafefn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
42773 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
42774 unsafefn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
42775
42776 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
42777 unsafefn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
42778 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
42779 unsafefn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
42780 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
42781 unsafefn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
42782
42783 #[link_name = "llvm.x86.avx512.gather.dpd.512"]
42784 unsafefn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
42785 #[link_name = "llvm.x86.avx512.gather.dps.512"]
42786 unsafefn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
42787 #[link_name = "llvm.x86.avx512.gather.qpd.512"]
42788 unsafefn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
42789 #[link_name = "llvm.x86.avx512.gather.qps.512"]
42790 unsafefn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
42791 #[link_name = "llvm.x86.avx512.gather.dpq.512"]
42792 unsafefn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
42793 #[link_name = "llvm.x86.avx512.gather.dpi.512"]
42794 unsafefn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
42795 #[link_name = "llvm.x86.avx512.gather.qpq.512"]
42796 unsafefn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
42797 #[link_name = "llvm.x86.avx512.gather.qpi.512"]
42798 unsafefn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
42799
42800 #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
42801 unsafefn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
42802 #[link_name = "llvm.x86.avx512.scatter.dps.512"]
42803 unsafefn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
42804 #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
42805 unsafefn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
42806 #[link_name = "llvm.x86.avx512.scatter.qps.512"]
42807 unsafefn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
42808 #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
42809 unsafefn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
42810
42811 #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
42812 unsafefn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
42813 #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
42814 unsafefn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
42815 #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
42816 unsafefn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
42817
42818 #[link_name = "llvm.x86.avx512.scattersiv4.si"]
42819 unsafefn vpscatterdd_128(slice: *mut i8, k: u8, offsets: i32x4, src: i32x4, scale: i32);
42820 #[link_name = "llvm.x86.avx512.scattersiv2.di"]
42821 unsafefn vpscatterdq_128(slice: *mut i8, k: u8, offsets: i32x4, src: i64x2, scale: i32);
42822 #[link_name = "llvm.x86.avx512.scattersiv2.df"]
42823 unsafefn vscatterdpd_128(slice: *mut i8, k: u8, offsets: i32x4, src: f64x2, scale: i32);
42824 #[link_name = "llvm.x86.avx512.scattersiv4.sf"]
42825 unsafefn vscatterdps_128(slice: *mut i8, k: u8, offsets: i32x4, src: f32x4, scale: i32);
42826 #[link_name = "llvm.x86.avx512.scatterdiv4.si"]
42827 unsafefn vpscatterqd_128(slice: *mut i8, k: u8, offsets: i64x2, src: i32x4, scale: i32);
42828 #[link_name = "llvm.x86.avx512.scatterdiv2.di"]
42829 unsafefn vpscatterqq_128(slice: *mut i8, k: u8, offsets: i64x2, src: i64x2, scale: i32);
42830 #[link_name = "llvm.x86.avx512.scatterdiv2.df"]
42831 unsafefn vscatterqpd_128(slice: *mut i8, k: u8, offsets: i64x2, src: f64x2, scale: i32);
42832 #[link_name = "llvm.x86.avx512.scatterdiv4.sf"]
42833 unsafefn vscatterqps_128(slice: *mut i8, k: u8, offsets: i64x2, src: f32x4, scale: i32);
42834
42835 #[link_name = "llvm.x86.avx512.scattersiv8.si"]
42836 unsafefn vpscatterdd_256(slice: *mut i8, k: u8, offsets: i32x8, src: i32x8, scale: i32);
42837 #[link_name = "llvm.x86.avx512.scattersiv4.di"]
42838 unsafefn vpscatterdq_256(slice: *mut i8, k: u8, offsets: i32x4, src: i64x4, scale: i32);
42839 #[link_name = "llvm.x86.avx512.scattersiv4.df"]
42840 unsafefn vscatterdpd_256(slice: *mut i8, k: u8, offsets: i32x4, src: f64x4, scale: i32);
42841 #[link_name = "llvm.x86.avx512.scattersiv8.sf"]
42842 unsafefn vscatterdps_256(slice: *mut i8, k: u8, offsets: i32x8, src: f32x8, scale: i32);
42843 #[link_name = "llvm.x86.avx512.scatterdiv8.si"]
42844 unsafefn vpscatterqd_256(slice: *mut i8, k: u8, offsets: i64x4, src: i32x4, scale: i32);
42845 #[link_name = "llvm.x86.avx512.scatterdiv4.di"]
42846 unsafefn vpscatterqq_256(slice: *mut i8, k: u8, offsets: i64x4, src: i64x4, scale: i32);
42847 #[link_name = "llvm.x86.avx512.scatterdiv4.df"]
42848 unsafefn vscatterqpd_256(slice: *mut i8, k: u8, offsets: i64x4, src: f64x4, scale: i32);
42849 #[link_name = "llvm.x86.avx512.scatterdiv8.sf"]
42850 unsafefn vscatterqps_256(slice: *mut i8, k: u8, offsets: i64x4, src: f32x4, scale: i32);
42851
42852 #[link_name = "llvm.x86.avx512.gather3siv4.si"]
42853 unsafefn vpgatherdd_128(src: i32x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i32x4;
42854 #[link_name = "llvm.x86.avx512.gather3siv2.di"]
42855 unsafefn vpgatherdq_128(src: i64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x2;
42856 #[link_name = "llvm.x86.avx512.gather3siv2.df"]
42857 unsafefn vgatherdpd_128(src: f64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x2;
42858 #[link_name = "llvm.x86.avx512.gather3siv4.sf"]
42859 unsafefn vgatherdps_128(src: f32x4, slice: *const u8, offsets: i32x4, k: u8, scale: i32) -> f32x4;
42860 #[link_name = "llvm.x86.avx512.gather3div4.si"]
42861 unsafefn vpgatherqd_128(src: i32x4, slice: *const u8, offsets: i64x2, k: u8, scale: i32) -> i32x4;
42862 #[link_name = "llvm.x86.avx512.gather3div2.di"]
42863 unsafefn vpgatherqq_128(src: i64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> i64x2;
42864 #[link_name = "llvm.x86.avx512.gather3div2.df"]
42865 unsafefn vgatherqpd_128(src: f64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f64x2;
42866 #[link_name = "llvm.x86.avx512.gather3div4.sf"]
42867 unsafefn vgatherqps_128(src: f32x4, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f32x4;
42868
42869 #[link_name = "llvm.x86.avx512.gather3siv8.si"]
42870 unsafefn vpgatherdd_256(src: i32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> i32x8;
42871 #[link_name = "llvm.x86.avx512.gather3siv4.di"]
42872 unsafefn vpgatherdq_256(src: i64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x4;
42873 #[link_name = "llvm.x86.avx512.gather3siv4.df"]
42874 unsafefn vgatherdpd_256(src: f64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x4;
42875 #[link_name = "llvm.x86.avx512.gather3siv8.sf"]
42876 unsafefn vgatherdps_256(src: f32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> f32x8;
42877 #[link_name = "llvm.x86.avx512.gather3div8.si"]
42878 unsafefn vpgatherqd_256(src: i32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i32x4;
42879 #[link_name = "llvm.x86.avx512.gather3div4.di"]
42880 unsafefn vpgatherqq_256(src: i64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i64x4;
42881 #[link_name = "llvm.x86.avx512.gather3div4.df"]
42882 unsafefn vgatherqpd_256(src: f64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f64x4;
42883 #[link_name = "llvm.x86.avx512.gather3div8.sf"]
42884 unsafefn vgatherqps_256(src: f32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f32x4;
42885
42886 #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
42887 unsafefn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
42888 #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
42889 unsafefn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
42890
42891 #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
42892 unsafefn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
42893 #[link_name = "llvm.x86.avx512.mask.cmp.ps.256"]
42894 unsafefn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8;
42895 #[link_name = "llvm.x86.avx512.mask.cmp.ps.128"]
42896 unsafefn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8;
42897
42898 #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
42899 unsafefn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
42900 #[link_name = "llvm.x86.avx512.mask.cmp.pd.256"]
42901 unsafefn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8;
42902 #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
42903 unsafefn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
42904
42905 #[link_name = "llvm.x86.avx512.mask.prol.d.512"]
42906 unsafefn vprold(a: i32x16, i8: i32) -> i32x16;
42907 #[link_name = "llvm.x86.avx512.mask.prol.d.256"]
42908 unsafefn vprold256(a: i32x8, i8: i32) -> i32x8;
42909 #[link_name = "llvm.x86.avx512.mask.prol.d.128"]
42910 unsafefn vprold128(a: i32x4, i8: i32) -> i32x4;
42911
42912 #[link_name = "llvm.x86.avx512.mask.pror.d.512"]
42913 unsafefn vprord(a: i32x16, i8: i32) -> i32x16;
42914 #[link_name = "llvm.x86.avx512.mask.pror.d.256"]
42915 unsafefn vprord256(a: i32x8, i8: i32) -> i32x8;
42916 #[link_name = "llvm.x86.avx512.mask.pror.d.128"]
42917 unsafefn vprord128(a: i32x4, i8: i32) -> i32x4;
42918
42919 #[link_name = "llvm.x86.avx512.mask.prol.q.512"]
42920 unsafefn vprolq(a: i64x8, i8: i32) -> i64x8;
42921 #[link_name = "llvm.x86.avx512.mask.prol.q.256"]
42922 unsafefn vprolq256(a: i64x4, i8: i32) -> i64x4;
42923 #[link_name = "llvm.x86.avx512.mask.prol.q.128"]
42924 unsafefn vprolq128(a: i64x2, i8: i32) -> i64x2;
42925
42926 #[link_name = "llvm.x86.avx512.mask.pror.q.512"]
42927 unsafefn vprorq(a: i64x8, i8: i32) -> i64x8;
42928 #[link_name = "llvm.x86.avx512.mask.pror.q.256"]
42929 unsafefn vprorq256(a: i64x4, i8: i32) -> i64x4;
42930 #[link_name = "llvm.x86.avx512.mask.pror.q.128"]
42931 unsafefn vprorq128(a: i64x2, i8: i32) -> i64x2;
42932
42933 #[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
42934 unsafefn vprolvd(a: i32x16, b: i32x16) -> i32x16;
42935 #[link_name = "llvm.x86.avx512.mask.prolv.d.256"]
42936 unsafefn vprolvd256(a: i32x8, b: i32x8) -> i32x8;
42937 #[link_name = "llvm.x86.avx512.mask.prolv.d.128"]
42938 unsafefn vprolvd128(a: i32x4, b: i32x4) -> i32x4;
42939
42940 #[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
42941 unsafefn vprorvd(a: i32x16, b: i32x16) -> i32x16;
42942 #[link_name = "llvm.x86.avx512.mask.prorv.d.256"]
42943 unsafefn vprorvd256(a: i32x8, b: i32x8) -> i32x8;
42944 #[link_name = "llvm.x86.avx512.mask.prorv.d.128"]
42945 unsafefn vprorvd128(a: i32x4, b: i32x4) -> i32x4;
42946
42947 #[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
42948 unsafefn vprolvq(a: i64x8, b: i64x8) -> i64x8;
42949 #[link_name = "llvm.x86.avx512.mask.prolv.q.256"]
42950 unsafefn vprolvq256(a: i64x4, b: i64x4) -> i64x4;
42951 #[link_name = "llvm.x86.avx512.mask.prolv.q.128"]
42952 unsafefn vprolvq128(a: i64x2, b: i64x2) -> i64x2;
42953
42954 #[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
42955 unsafefn vprorvq(a: i64x8, b: i64x8) -> i64x8;
42956 #[link_name = "llvm.x86.avx512.mask.prorv.q.256"]
42957 unsafefn vprorvq256(a: i64x4, b: i64x4) -> i64x4;
42958 #[link_name = "llvm.x86.avx512.mask.prorv.q.128"]
42959 unsafefn vprorvq128(a: i64x2, b: i64x2) -> i64x2;
42960
42961 #[link_name = "llvm.x86.avx512.psllv.d.512"]
42962 unsafefn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
42963 #[link_name = "llvm.x86.avx512.psrlv.d.512"]
42964 unsafefn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
42965 #[link_name = "llvm.x86.avx512.psllv.q.512"]
42966 unsafefn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
42967 #[link_name = "llvm.x86.avx512.psrlv.q.512"]
42968 unsafefn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
42969
42970 #[link_name = "llvm.x86.avx512.psll.d.512"]
42971 unsafefn vpslld(a: i32x16, count: i32x4) -> i32x16;
42972 #[link_name = "llvm.x86.avx512.psrl.d.512"]
42973 unsafefn vpsrld(a: i32x16, count: i32x4) -> i32x16;
42974 #[link_name = "llvm.x86.avx512.psll.q.512"]
42975 unsafefn vpsllq(a: i64x8, count: i64x2) -> i64x8;
42976 #[link_name = "llvm.x86.avx512.psrl.q.512"]
42977 unsafefn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
42978
42979 #[link_name = "llvm.x86.avx512.psra.d.512"]
42980 unsafefn vpsrad(a: i32x16, count: i32x4) -> i32x16;
42981
42982 #[link_name = "llvm.x86.avx512.psra.q.512"]
42983 unsafefn vpsraq(a: i64x8, count: i64x2) -> i64x8;
42984 #[link_name = "llvm.x86.avx512.psra.q.256"]
42985 unsafefn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
42986 #[link_name = "llvm.x86.avx512.psra.q.128"]
42987 unsafefn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
42988
42989 #[link_name = "llvm.x86.avx512.psrav.d.512"]
42990 unsafefn vpsravd(a: i32x16, count: i32x16) -> i32x16;
42991
42992 #[link_name = "llvm.x86.avx512.psrav.q.512"]
42993 unsafefn vpsravq(a: i64x8, count: i64x8) -> i64x8;
42994 #[link_name = "llvm.x86.avx512.psrav.q.256"]
42995 unsafefn vpsravq256(a: i64x4, count: i64x4) -> i64x4;
42996 #[link_name = "llvm.x86.avx512.psrav.q.128"]
42997 unsafefn vpsravq128(a: i64x2, count: i64x2) -> i64x2;
42998
42999 #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
43000 unsafefn vpermilps(a: f32x16, b: i32x16) -> f32x16;
43001 #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
43002 unsafefn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
43003
43004 #[link_name = "llvm.x86.avx512.permvar.si.512"]
43005 unsafefn vpermd(a: i32x16, idx: i32x16) -> i32x16;
43006
43007 #[link_name = "llvm.x86.avx512.permvar.di.512"]
43008 unsafefn vpermq(a: i64x8, idx: i64x8) -> i64x8;
43009 #[link_name = "llvm.x86.avx512.permvar.di.256"]
43010 unsafefn vpermq256(a: i64x4, idx: i64x4) -> i64x4;
43011
43012 #[link_name = "llvm.x86.avx512.permvar.sf.512"]
43013 unsafefn vpermps(a: f32x16, idx: i32x16) -> f32x16;
43014
43015 #[link_name = "llvm.x86.avx512.permvar.df.512"]
43016 unsafefn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
43017 #[link_name = "llvm.x86.avx512.permvar.df.256"]
43018 unsafefn vpermpd256(a: f64x4, idx: i64x4) -> f64x4;
43019
43020 #[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
43021 unsafefn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
43022 #[link_name = "llvm.x86.avx512.vpermi2var.d.256"]
43023 unsafefn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8;
43024 #[link_name = "llvm.x86.avx512.vpermi2var.d.128"]
43025 unsafefn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4;
43026
43027 #[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
43028 unsafefn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
43029 #[link_name = "llvm.x86.avx512.vpermi2var.q.256"]
43030 unsafefn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4;
43031 #[link_name = "llvm.x86.avx512.vpermi2var.q.128"]
43032 unsafefn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2;
43033
43034 #[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
43035 unsafefn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
43036 #[link_name = "llvm.x86.avx512.vpermi2var.ps.256"]
43037 unsafefn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8;
43038 #[link_name = "llvm.x86.avx512.vpermi2var.ps.128"]
43039 unsafefn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4;
43040
43041 #[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
43042 unsafefn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
43043 #[link_name = "llvm.x86.avx512.vpermi2var.pd.256"]
43044 unsafefn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4;
43045 #[link_name = "llvm.x86.avx512.vpermi2var.pd.128"]
43046 unsafefn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2;
43047
43048 #[link_name = "llvm.x86.avx512.mask.compress.d.512"]
43049 unsafefn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
43050 #[link_name = "llvm.x86.avx512.mask.compress.d.256"]
43051 unsafefn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
43052 #[link_name = "llvm.x86.avx512.mask.compress.d.128"]
43053 unsafefn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
43054
43055 #[link_name = "llvm.x86.avx512.mask.compress.q.512"]
43056 unsafefn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
43057 #[link_name = "llvm.x86.avx512.mask.compress.q.256"]
43058 unsafefn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
43059 #[link_name = "llvm.x86.avx512.mask.compress.q.128"]
43060 unsafefn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
43061
43062 #[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
43063 unsafefn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
43064 #[link_name = "llvm.x86.avx512.mask.compress.ps.256"]
43065 unsafefn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
43066 #[link_name = "llvm.x86.avx512.mask.compress.ps.128"]
43067 unsafefn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
43068
43069 #[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
43070 unsafefn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
43071 #[link_name = "llvm.x86.avx512.mask.compress.pd.256"]
43072 unsafefn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
43073 #[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
43074 unsafefn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
43075
43076 #[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
43077 unsafefn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
43078 #[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
43079 unsafefn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
43080 #[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
43081 unsafefn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
43082
43083 #[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
43084 unsafefn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
43085 #[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
43086 unsafefn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
43087 #[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
43088 unsafefn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
43089
43090 #[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
43091 unsafefn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
43092 #[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
43093 unsafefn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
43094 #[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
43095 unsafefn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
43096
43097 #[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
43098 unsafefn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
43099 #[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
43100 unsafefn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
43101 #[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
43102 unsafefn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
43103
43104 #[link_name = "llvm.x86.avx512.mask.expand.d.512"]
43105 unsafefn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
43106 #[link_name = "llvm.x86.avx512.mask.expand.d.256"]
43107 unsafefn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
43108 #[link_name = "llvm.x86.avx512.mask.expand.d.128"]
43109 unsafefn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
43110
43111 #[link_name = "llvm.x86.avx512.mask.expand.q.512"]
43112 unsafefn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
43113 #[link_name = "llvm.x86.avx512.mask.expand.q.256"]
43114 unsafefn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
43115 #[link_name = "llvm.x86.avx512.mask.expand.q.128"]
43116 unsafefn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
43117
43118 #[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
43119 unsafefn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
43120 #[link_name = "llvm.x86.avx512.mask.expand.ps.256"]
43121 unsafefn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
43122 #[link_name = "llvm.x86.avx512.mask.expand.ps.128"]
43123 unsafefn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
43124
43125 #[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
43126 unsafefn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
43127 #[link_name = "llvm.x86.avx512.mask.expand.pd.256"]
43128 unsafefn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
43129 #[link_name = "llvm.x86.avx512.mask.expand.pd.128"]
43130 unsafefn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
43131
43132 #[link_name = "llvm.x86.avx512.mask.add.ss.round"]
43133 unsafefn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43134 #[link_name = "llvm.x86.avx512.mask.add.sd.round"]
43135 unsafefn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43136 #[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
43137 unsafefn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43138 #[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
43139 unsafefn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43140 #[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
43141 unsafefn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43142 #[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
43143 unsafefn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43144 #[link_name = "llvm.x86.avx512.mask.div.ss.round"]
43145 unsafefn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43146 #[link_name = "llvm.x86.avx512.mask.div.sd.round"]
43147 unsafefn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43148 #[link_name = "llvm.x86.avx512.mask.max.ss.round"]
43149 unsafefn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43150 #[link_name = "llvm.x86.avx512.mask.max.sd.round"]
43151 unsafefn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43152 #[link_name = "llvm.x86.avx512.mask.min.ss.round"]
43153 unsafefn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43154 #[link_name = "llvm.x86.avx512.mask.min.sd.round"]
43155 unsafefn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43156 #[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
43157 unsafefn vsqrtss(a: __m128, b: __m128, src: __m128, mask: u8, rounding: i32) -> __m128;
43158 #[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
43159 unsafefn vsqrtsd(a: __m128d, b: __m128d, src: __m128d, mask: u8, rounding: i32) -> __m128d;
43160 #[link_name = "llvm.x86.avx512.mask.getexp.ss"]
43161 unsafefn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43162 #[link_name = "llvm.x86.avx512.mask.getexp.sd"]
43163 unsafefn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43164 #[link_name = "llvm.x86.avx512.mask.getmant.ss"]
43165 unsafefn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
43166 #[link_name = "llvm.x86.avx512.mask.getmant.sd"]
43167 unsafefn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
43168
43169 #[link_name = "llvm.x86.avx512.rsqrt14.ss"]
43170 unsafefn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43171 #[link_name = "llvm.x86.avx512.rsqrt14.sd"]
43172 unsafefn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43173 #[link_name = "llvm.x86.avx512.rcp14.ss"]
43174 unsafefn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43175 #[link_name = "llvm.x86.avx512.rcp14.sd"]
43176 unsafefn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43177
43178 #[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
43179 unsafefn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
43180 #[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
43181 unsafefn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
43182 #[link_name = "llvm.x86.avx512.mask.scalef.ss"]
43183 unsafefn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43184 #[link_name = "llvm.x86.avx512.mask.scalef.sd"]
43185 unsafefn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43186
43187 #[link_name = "llvm.x86.avx512.vfmadd.f32"]
43188 unsafefn vfmaddssround(a: f32, b: f32, c: f32, rounding: i32) -> f32;
43189 #[link_name = "llvm.x86.avx512.vfmadd.f64"]
43190 unsafefn vfmaddsdround(a: f64, b: f64, c: f64, rounding: i32) -> f64;
43191
43192 #[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
43193 unsafefn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43194 #[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
43195 unsafefn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43196 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
43197 unsafefn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43198 #[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
43199 unsafefn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43200
43201 #[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
43202 unsafefn vcvtss2sd(a: f64x2, b: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
43203 #[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
43204 unsafefn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43205
43206 #[link_name = "llvm.x86.avx512.vcvtss2si32"]
43207 unsafefn vcvtss2si(a: f32x4, rounding: i32) -> i32;
43208 #[link_name = "llvm.x86.avx512.vcvtss2usi32"]
43209 unsafefn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
43210
43211 #[link_name = "llvm.x86.avx512.vcvtsd2si32"]
43212 unsafefn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
43213 #[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
43214 unsafefn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
43215
43216 #[link_name = "llvm.x86.avx512.cvtsi2ss32"]
43217 unsafefn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
43218
43219 #[link_name = "llvm.x86.avx512.cvtusi2ss"]
43220 unsafefn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
43221
43222 #[link_name = "llvm.x86.avx512.cvttss2si"]
43223 unsafefn vcvttss2si(a: f32x4, rounding: i32) -> i32;
43224 #[link_name = "llvm.x86.avx512.cvttss2usi"]
43225 unsafefn vcvttss2usi(a: f32x4, rounding: i32) -> u32;
43226
43227 #[link_name = "llvm.x86.avx512.cvttsd2si"]
43228 unsafefn vcvttsd2si(a: f64x2, rounding: i32) -> i32;
43229 #[link_name = "llvm.x86.avx512.cvttsd2usi"]
43230 unsafefn vcvttsd2usi(a: f64x2, rounding: i32) -> u32;
43231
43232 #[link_name = "llvm.x86.avx512.vcomi.ss"]
43233 unsafefn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
43234 #[link_name = "llvm.x86.avx512.vcomi.sd"]
43235 unsafefn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
43236
43237 #[link_name = "llvm.x86.avx512.mask.loadu.d.128"]
43238 unsafefn loaddqu32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43239 #[link_name = "llvm.x86.avx512.mask.loadu.q.128"]
43240 unsafefn loaddqu64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43241 #[link_name = "llvm.x86.avx512.mask.loadu.ps.128"]
43242 unsafefn loadups_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43243 #[link_name = "llvm.x86.avx512.mask.loadu.pd.128"]
43244 unsafefn loadupd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43245 #[link_name = "llvm.x86.avx512.mask.loadu.d.256"]
43246 unsafefn loaddqu32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43247 #[link_name = "llvm.x86.avx512.mask.loadu.q.256"]
43248 unsafefn loaddqu64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43249 #[link_name = "llvm.x86.avx512.mask.loadu.ps.256"]
43250 unsafefn loadups_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43251 #[link_name = "llvm.x86.avx512.mask.loadu.pd.256"]
43252 unsafefn loadupd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43253 #[link_name = "llvm.x86.avx512.mask.loadu.d.512"]
43254 unsafefn loaddqu32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43255 #[link_name = "llvm.x86.avx512.mask.loadu.q.512"]
43256 unsafefn loaddqu64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43257 #[link_name = "llvm.x86.avx512.mask.loadu.ps.512"]
43258 unsafefn loadups_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43259 #[link_name = "llvm.x86.avx512.mask.loadu.pd.512"]
43260 unsafefn loadupd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43261
43262 #[link_name = "llvm.x86.avx512.mask.load.d.128"]
43263 unsafefn loaddqa32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43264 #[link_name = "llvm.x86.avx512.mask.load.q.128"]
43265 unsafefn loaddqa64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43266 #[link_name = "llvm.x86.avx512.mask.load.ps.128"]
43267 unsafefn loadaps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43268 #[link_name = "llvm.x86.avx512.mask.load.pd.128"]
43269 unsafefn loadapd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43270 #[link_name = "llvm.x86.avx512.mask.load.d.256"]
43271 unsafefn loaddqa32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43272 #[link_name = "llvm.x86.avx512.mask.load.q.256"]
43273 unsafefn loaddqa64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43274 #[link_name = "llvm.x86.avx512.mask.load.ps.256"]
43275 unsafefn loadaps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43276 #[link_name = "llvm.x86.avx512.mask.load.pd.256"]
43277 unsafefn loadapd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43278 #[link_name = "llvm.x86.avx512.mask.load.d.512"]
43279 unsafefn loaddqa32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43280 #[link_name = "llvm.x86.avx512.mask.load.q.512"]
43281 unsafefn loaddqa64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43282 #[link_name = "llvm.x86.avx512.mask.load.ps.512"]
43283 unsafefn loadaps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43284 #[link_name = "llvm.x86.avx512.mask.load.pd.512"]
43285 unsafefn loadapd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43286
43287 #[link_name = "llvm.x86.avx512.mask.storeu.d.128"]
43288 unsafefn storedqu32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
43289 #[link_name = "llvm.x86.avx512.mask.storeu.q.128"]
43290 unsafefn storedqu64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
43291 #[link_name = "llvm.x86.avx512.mask.storeu.ps.128"]
43292 unsafefn storeups_128(mem_addr: *mut f32, a: f32x4, mask: u8);
43293 #[link_name = "llvm.x86.avx512.mask.storeu.pd.128"]
43294 unsafefn storeupd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
43295 #[link_name = "llvm.x86.avx512.mask.storeu.d.256"]
43296 unsafefn storedqu32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
43297 #[link_name = "llvm.x86.avx512.mask.storeu.q.256"]
43298 unsafefn storedqu64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
43299 #[link_name = "llvm.x86.avx512.mask.storeu.ps.256"]
43300 unsafefn storeups_256(mem_addr: *mut f32, a: f32x8, mask: u8);
43301 #[link_name = "llvm.x86.avx512.mask.storeu.pd.256"]
43302 unsafefn storeupd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
43303 #[link_name = "llvm.x86.avx512.mask.storeu.d.512"]
43304 unsafefn storedqu32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
43305 #[link_name = "llvm.x86.avx512.mask.storeu.q.512"]
43306 unsafefn storedqu64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
43307 #[link_name = "llvm.x86.avx512.mask.storeu.ps.512"]
43308 unsafefn storeups_512(mem_addr: *mut f32, a: f32x16, mask: u16);
43309 #[link_name = "llvm.x86.avx512.mask.storeu.pd.512"]
43310 unsafefn storeupd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
43311
43312 #[link_name = "llvm.x86.avx512.mask.store.d.128"]
43313 unsafefn storedqa32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
43314 #[link_name = "llvm.x86.avx512.mask.store.q.128"]
43315 unsafefn storedqa64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
43316 #[link_name = "llvm.x86.avx512.mask.store.ps.128"]
43317 unsafefn storeaps_128(mem_addr: *mut f32, a: f32x4, mask: u8);
43318 #[link_name = "llvm.x86.avx512.mask.store.pd.128"]
43319 unsafefn storeapd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
43320 #[link_name = "llvm.x86.avx512.mask.store.d.256"]
43321 unsafefn storedqa32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
43322 #[link_name = "llvm.x86.avx512.mask.store.q.256"]
43323 unsafefn storedqa64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
43324 #[link_name = "llvm.x86.avx512.mask.store.ps.256"]
43325 unsafefn storeaps_256(mem_addr: *mut f32, a: f32x8, mask: u8);
43326 #[link_name = "llvm.x86.avx512.mask.store.pd.256"]
43327 unsafefn storeapd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
43328 #[link_name = "llvm.x86.avx512.mask.store.d.512"]
43329 unsafefn storedqa32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
43330 #[link_name = "llvm.x86.avx512.mask.store.q.512"]
43331 unsafefn storedqa64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
43332 #[link_name = "llvm.x86.avx512.mask.store.ps.512"]
43333 unsafefn storeaps_512(mem_addr: *mut f32, a: f32x16, mask: u16);
43334 #[link_name = "llvm.x86.avx512.mask.store.pd.512"]
43335 unsafefn storeapd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
43336
43337 #[link_name = "llvm.x86.avx512.mask.expand.load.d.128"]
43338 unsafefn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43339 #[link_name = "llvm.x86.avx512.mask.expand.load.q.128"]
43340 unsafefn expandloadq_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43341 #[link_name = "llvm.x86.avx512.mask.expand.load.ps.128"]
43342 unsafefn expandloadps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43343 #[link_name = "llvm.x86.avx512.mask.expand.load.pd.128"]
43344 unsafefn expandloadpd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43345 #[link_name = "llvm.x86.avx512.mask.expand.load.d.256"]
43346 unsafefn expandloadd_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43347 #[link_name = "llvm.x86.avx512.mask.expand.load.q.256"]
43348 unsafefn expandloadq_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43349 #[link_name = "llvm.x86.avx512.mask.expand.load.ps.256"]
43350 unsafefn expandloadps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43351 #[link_name = "llvm.x86.avx512.mask.expand.load.pd.256"]
43352 unsafefn expandloadpd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43353 #[link_name = "llvm.x86.avx512.mask.expand.load.d.512"]
43354 unsafefn expandloadd_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43355 #[link_name = "llvm.x86.avx512.mask.expand.load.q.512"]
43356 unsafefn expandloadq_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43357 #[link_name = "llvm.x86.avx512.mask.expand.load.ps.512"]
43358 unsafefn expandloadps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43359 #[link_name = "llvm.x86.avx512.mask.expand.load.pd.512"]
43360 unsafefn expandloadpd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43361
43362}
43363
43364#[cfg(test)]
43365mod tests {
43366
43367 use stdarch_test::simd_test;
43368
43369 use crate::core_arch::x86::*;
43370 use crate::hint::black_box;
43371 use crate::mem::{self};
43372
43373 #[simd_test(enable = "avx512f")]
43374 unsafe fn test_mm512_abs_epi32() {
43375 #[rustfmt::skip]
43376 let a = _mm512_setr_epi32(
43377 0, 1, -1, i32::MAX,
43378 i32::MIN, 100, -100, -32,
43379 0, 1, -1, i32::MAX,
43380 i32::MIN, 100, -100, -32,
43381 );
43382 let r = _mm512_abs_epi32(a);
43383 #[rustfmt::skip]
43384 let e = _mm512_setr_epi32(
43385 0, 1, 1, i32::MAX,
43386 i32::MAX.wrapping_add(1), 100, 100, 32,
43387 0, 1, 1, i32::MAX,
43388 i32::MAX.wrapping_add(1), 100, 100, 32,
43389 );
43390 assert_eq_m512i(r, e);
43391 }
43392
43393 #[simd_test(enable = "avx512f")]
43394 unsafe fn test_mm512_mask_abs_epi32() {
43395 #[rustfmt::skip]
43396 let a = _mm512_setr_epi32(
43397 0, 1, -1, i32::MAX,
43398 i32::MIN, 100, -100, -32,
43399 0, 1, -1, i32::MAX,
43400 i32::MIN, 100, -100, -32,
43401 );
43402 let r = _mm512_mask_abs_epi32(a, 0, a);
43403 assert_eq_m512i(r, a);
43404 let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
43405 #[rustfmt::skip]
43406 let e = _mm512_setr_epi32(
43407 0, 1, 1, i32::MAX,
43408 i32::MAX.wrapping_add(1), 100, 100, 32,
43409 0, 1, -1, i32::MAX,
43410 i32::MIN, 100, -100, -32,
43411 );
43412 assert_eq_m512i(r, e);
43413 }
43414
43415 #[simd_test(enable = "avx512f")]
43416 unsafe fn test_mm512_maskz_abs_epi32() {
43417 #[rustfmt::skip]
43418 let a = _mm512_setr_epi32(
43419 0, 1, -1, i32::MAX,
43420 i32::MIN, 100, -100, -32,
43421 0, 1, -1, i32::MAX,
43422 i32::MIN, 100, -100, -32,
43423 );
43424 let r = _mm512_maskz_abs_epi32(0, a);
43425 assert_eq_m512i(r, _mm512_setzero_si512());
43426 let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
43427 #[rustfmt::skip]
43428 let e = _mm512_setr_epi32(
43429 0, 1, 1, i32::MAX,
43430 i32::MAX.wrapping_add(1), 100, 100, 32,
43431 0, 0, 0, 0,
43432 0, 0, 0, 0,
43433 );
43434 assert_eq_m512i(r, e);
43435 }
43436
43437 #[simd_test(enable = "avx512f,avx512vl")]
43438 unsafe fn test_mm256_mask_abs_epi32() {
43439 #[rustfmt::skip]
43440 let a = _mm256_setr_epi32(
43441 0, 1, -1, i32::MAX,
43442 i32::MIN, 100, -100, -32,
43443 );
43444 let r = _mm256_mask_abs_epi32(a, 0, a);
43445 assert_eq_m256i(r, a);
43446 let r = _mm256_mask_abs_epi32(a, 0b00001111, a);
43447 #[rustfmt::skip]
43448 let e = _mm256_setr_epi32(
43449 0, 1, 1, i32::MAX,
43450 i32::MAX.wrapping_add(1), 100, -100, -32,
43451 );
43452 assert_eq_m256i(r, e);
43453 }
43454
43455 #[simd_test(enable = "avx512f,avx512vl")]
43456 unsafe fn test_mm256_maskz_abs_epi32() {
43457 #[rustfmt::skip]
43458 let a = _mm256_setr_epi32(
43459 0, 1, -1, i32::MAX,
43460 i32::MIN, 100, -100, -32,
43461 );
43462 let r = _mm256_maskz_abs_epi32(0, a);
43463 assert_eq_m256i(r, _mm256_setzero_si256());
43464 let r = _mm256_maskz_abs_epi32(0b00001111, a);
43465 #[rustfmt::skip]
43466 let e = _mm256_setr_epi32(
43467 0, 1, 1, i32::MAX,
43468 0, 0, 0, 0,
43469 );
43470 assert_eq_m256i(r, e);
43471 }
43472
43473 #[simd_test(enable = "avx512f,avx512vl")]
43474 unsafe fn test_mm_mask_abs_epi32() {
43475 let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
43476 let r = _mm_mask_abs_epi32(a, 0, a);
43477 assert_eq_m128i(r, a);
43478 let r = _mm_mask_abs_epi32(a, 0b00001111, a);
43479 let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
43480 assert_eq_m128i(r, e);
43481 }
43482
43483 #[simd_test(enable = "avx512f,avx512vl")]
43484 unsafe fn test_mm_maskz_abs_epi32() {
43485 let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
43486 let r = _mm_maskz_abs_epi32(0, a);
43487 assert_eq_m128i(r, _mm_setzero_si128());
43488 let r = _mm_maskz_abs_epi32(0b00001111, a);
43489 let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
43490 assert_eq_m128i(r, e);
43491 }
43492
43493 #[simd_test(enable = "avx512f")]
43494 unsafe fn test_mm512_abs_ps() {
43495 #[rustfmt::skip]
43496 let a = _mm512_setr_ps(
43497 0., 1., -1., f32::MAX,
43498 f32::MIN, 100., -100., -32.,
43499 0., 1., -1., f32::MAX,
43500 f32::MIN, 100., -100., -32.,
43501 );
43502 let r = _mm512_abs_ps(a);
43503 #[rustfmt::skip]
43504 let e = _mm512_setr_ps(
43505 0., 1., 1., f32::MAX,
43506 f32::MAX, 100., 100., 32.,
43507 0., 1., 1., f32::MAX,
43508 f32::MAX, 100., 100., 32.,
43509 );
43510 assert_eq_m512(r, e);
43511 }
43512
43513 #[simd_test(enable = "avx512f")]
43514 unsafe fn test_mm512_mask_abs_ps() {
43515 #[rustfmt::skip]
43516 let a = _mm512_setr_ps(
43517 0., 1., -1., f32::MAX,
43518 f32::MIN, 100., -100., -32.,
43519 0., 1., -1., f32::MAX,
43520 f32::MIN, 100., -100., -32.,
43521 );
43522 let r = _mm512_mask_abs_ps(a, 0, a);
43523 assert_eq_m512(r, a);
43524 let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a);
43525 #[rustfmt::skip]
43526 let e = _mm512_setr_ps(
43527 0., 1., 1., f32::MAX,
43528 f32::MAX, 100., 100., 32.,
43529 0., 1., -1., f32::MAX,
43530 f32::MIN, 100., -100., -32.,
43531 );
43532 assert_eq_m512(r, e);
43533 }
43534
43535 #[simd_test(enable = "avx512f")]
43536 unsafe fn test_mm512_mask_mov_epi32() {
43537 let src = _mm512_set1_epi32(1);
43538 let a = _mm512_set1_epi32(2);
43539 let r = _mm512_mask_mov_epi32(src, 0, a);
43540 assert_eq_m512i(r, src);
43541 let r = _mm512_mask_mov_epi32(src, 0b11111111_11111111, a);
43542 assert_eq_m512i(r, a);
43543 }
43544
43545 #[simd_test(enable = "avx512f")]
43546 unsafe fn test_mm512_maskz_mov_epi32() {
43547 let a = _mm512_set1_epi32(2);
43548 let r = _mm512_maskz_mov_epi32(0, a);
43549 assert_eq_m512i(r, _mm512_setzero_si512());
43550 let r = _mm512_maskz_mov_epi32(0b11111111_11111111, a);
43551 assert_eq_m512i(r, a);
43552 }
43553
43554 #[simd_test(enable = "avx512f,avx512vl")]
43555 unsafe fn test_mm256_mask_mov_epi32() {
43556 let src = _mm256_set1_epi32(1);
43557 let a = _mm256_set1_epi32(2);
43558 let r = _mm256_mask_mov_epi32(src, 0, a);
43559 assert_eq_m256i(r, src);
43560 let r = _mm256_mask_mov_epi32(src, 0b11111111, a);
43561 assert_eq_m256i(r, a);
43562 }
43563
43564 #[simd_test(enable = "avx512f,avx512vl")]
43565 unsafe fn test_mm256_maskz_mov_epi32() {
43566 let a = _mm256_set1_epi32(2);
43567 let r = _mm256_maskz_mov_epi32(0, a);
43568 assert_eq_m256i(r, _mm256_setzero_si256());
43569 let r = _mm256_maskz_mov_epi32(0b11111111, a);
43570 assert_eq_m256i(r, a);
43571 }
43572
43573 #[simd_test(enable = "avx512f,avx512vl")]
43574 unsafe fn test_mm_mask_mov_epi32() {
43575 let src = _mm_set1_epi32(1);
43576 let a = _mm_set1_epi32(2);
43577 let r = _mm_mask_mov_epi32(src, 0, a);
43578 assert_eq_m128i(r, src);
43579 let r = _mm_mask_mov_epi32(src, 0b00001111, a);
43580 assert_eq_m128i(r, a);
43581 }
43582
43583 #[simd_test(enable = "avx512f,avx512vl")]
43584 unsafe fn test_mm_maskz_mov_epi32() {
43585 let a = _mm_set1_epi32(2);
43586 let r = _mm_maskz_mov_epi32(0, a);
43587 assert_eq_m128i(r, _mm_setzero_si128());
43588 let r = _mm_maskz_mov_epi32(0b00001111, a);
43589 assert_eq_m128i(r, a);
43590 }
43591
43592 #[simd_test(enable = "avx512f")]
43593 unsafe fn test_mm512_mask_mov_ps() {
43594 let src = _mm512_set1_ps(1.);
43595 let a = _mm512_set1_ps(2.);
43596 let r = _mm512_mask_mov_ps(src, 0, a);
43597 assert_eq_m512(r, src);
43598 let r = _mm512_mask_mov_ps(src, 0b11111111_11111111, a);
43599 assert_eq_m512(r, a);
43600 }
43601
43602 #[simd_test(enable = "avx512f")]
43603 unsafe fn test_mm512_maskz_mov_ps() {
43604 let a = _mm512_set1_ps(2.);
43605 let r = _mm512_maskz_mov_ps(0, a);
43606 assert_eq_m512(r, _mm512_setzero_ps());
43607 let r = _mm512_maskz_mov_ps(0b11111111_11111111, a);
43608 assert_eq_m512(r, a);
43609 }
43610
43611 #[simd_test(enable = "avx512f,avx512vl")]
43612 unsafe fn test_mm256_mask_mov_ps() {
43613 let src = _mm256_set1_ps(1.);
43614 let a = _mm256_set1_ps(2.);
43615 let r = _mm256_mask_mov_ps(src, 0, a);
43616 assert_eq_m256(r, src);
43617 let r = _mm256_mask_mov_ps(src, 0b11111111, a);
43618 assert_eq_m256(r, a);
43619 }
43620
43621 #[simd_test(enable = "avx512f,avx512vl")]
43622 unsafe fn test_mm256_maskz_mov_ps() {
43623 let a = _mm256_set1_ps(2.);
43624 let r = _mm256_maskz_mov_ps(0, a);
43625 assert_eq_m256(r, _mm256_setzero_ps());
43626 let r = _mm256_maskz_mov_ps(0b11111111, a);
43627 assert_eq_m256(r, a);
43628 }
43629
43630 #[simd_test(enable = "avx512f,avx512vl")]
43631 unsafe fn test_mm_mask_mov_ps() {
43632 let src = _mm_set1_ps(1.);
43633 let a = _mm_set1_ps(2.);
43634 let r = _mm_mask_mov_ps(src, 0, a);
43635 assert_eq_m128(r, src);
43636 let r = _mm_mask_mov_ps(src, 0b00001111, a);
43637 assert_eq_m128(r, a);
43638 }
43639
43640 #[simd_test(enable = "avx512f,avx512vl")]
43641 unsafe fn test_mm_maskz_mov_ps() {
43642 let a = _mm_set1_ps(2.);
43643 let r = _mm_maskz_mov_ps(0, a);
43644 assert_eq_m128(r, _mm_setzero_ps());
43645 let r = _mm_maskz_mov_ps(0b00001111, a);
43646 assert_eq_m128(r, a);
43647 }
43648
43649 #[simd_test(enable = "avx512f")]
43650 unsafe fn test_mm512_add_epi32() {
43651 #[rustfmt::skip]
43652 let a = _mm512_setr_epi32(
43653 0, 1, -1, i32::MAX,
43654 i32::MIN, 100, -100, -32,
43655 0, 1, -1, i32::MAX,
43656 i32::MIN, 100, -100, -32,
43657 );
43658 let b = _mm512_set1_epi32(1);
43659 let r = _mm512_add_epi32(a, b);
43660 #[rustfmt::skip]
43661 let e = _mm512_setr_epi32(
43662 1, 2, 0, i32::MIN,
43663 i32::MIN + 1, 101, -99, -31,
43664 1, 2, 0, i32::MIN,
43665 i32::MIN + 1, 101, -99, -31,
43666 );
43667 assert_eq_m512i(r, e);
43668 }
43669
43670 #[simd_test(enable = "avx512f")]
43671 unsafe fn test_mm512_mask_add_epi32() {
43672 #[rustfmt::skip]
43673 let a = _mm512_setr_epi32(
43674 0, 1, -1, i32::MAX,
43675 i32::MIN, 100, -100, -32,
43676 0, 1, -1, i32::MAX,
43677 i32::MIN, 100, -100, -32,
43678 );
43679 let b = _mm512_set1_epi32(1);
43680 let r = _mm512_mask_add_epi32(a, 0, a, b);
43681 assert_eq_m512i(r, a);
43682 let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
43683 #[rustfmt::skip]
43684 let e = _mm512_setr_epi32(
43685 1, 2, 0, i32::MIN,
43686 i32::MIN + 1, 101, -99, -31,
43687 0, 1, -1, i32::MAX,
43688 i32::MIN, 100, -100, -32,
43689 );
43690 assert_eq_m512i(r, e);
43691 }
43692
43693 #[simd_test(enable = "avx512f")]
43694 unsafe fn test_mm512_maskz_add_epi32() {
43695 #[rustfmt::skip]
43696 let a = _mm512_setr_epi32(
43697 0, 1, -1, i32::MAX,
43698 i32::MIN, 100, -100, -32,
43699 0, 1, -1, i32::MAX,
43700 i32::MIN, 100, -100, -32,
43701 );
43702 let b = _mm512_set1_epi32(1);
43703 let r = _mm512_maskz_add_epi32(0, a, b);
43704 assert_eq_m512i(r, _mm512_setzero_si512());
43705 let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
43706 #[rustfmt::skip]
43707 let e = _mm512_setr_epi32(
43708 1, 2, 0, i32::MIN,
43709 i32::MIN + 1, 101, -99, -31,
43710 0, 0, 0, 0,
43711 0, 0, 0, 0,
43712 );
43713 assert_eq_m512i(r, e);
43714 }
43715
43716 #[simd_test(enable = "avx512f,avx512vl")]
43717 unsafe fn test_mm256_mask_add_epi32() {
43718 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43719 let b = _mm256_set1_epi32(1);
43720 let r = _mm256_mask_add_epi32(a, 0, a, b);
43721 assert_eq_m256i(r, a);
43722 let r = _mm256_mask_add_epi32(a, 0b11111111, a, b);
43723 let e = _mm256_set_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
43724 assert_eq_m256i(r, e);
43725 }
43726
43727 #[simd_test(enable = "avx512f,avx512vl")]
43728 unsafe fn test_mm256_maskz_add_epi32() {
43729 let a = _mm256_setr_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43730 let b = _mm256_set1_epi32(1);
43731 let r = _mm256_maskz_add_epi32(0, a, b);
43732 assert_eq_m256i(r, _mm256_setzero_si256());
43733 let r = _mm256_maskz_add_epi32(0b11111111, a, b);
43734 let e = _mm256_setr_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
43735 assert_eq_m256i(r, e);
43736 }
43737
43738 #[simd_test(enable = "avx512f,avx512vl")]
43739 unsafe fn test_mm_mask_add_epi32() {
43740 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43741 let b = _mm_set1_epi32(1);
43742 let r = _mm_mask_add_epi32(a, 0, a, b);
43743 assert_eq_m128i(r, a);
43744 let r = _mm_mask_add_epi32(a, 0b00001111, a, b);
43745 let e = _mm_set_epi32(2, 0, i32::MIN, i32::MIN + 1);
43746 assert_eq_m128i(r, e);
43747 }
43748
43749 #[simd_test(enable = "avx512f,avx512vl")]
43750 unsafe fn test_mm_maskz_add_epi32() {
43751 let a = _mm_setr_epi32(1, -1, i32::MAX, i32::MIN);
43752 let b = _mm_set1_epi32(1);
43753 let r = _mm_maskz_add_epi32(0, a, b);
43754 assert_eq_m128i(r, _mm_setzero_si128());
43755 let r = _mm_maskz_add_epi32(0b00001111, a, b);
43756 let e = _mm_setr_epi32(2, 0, i32::MIN, i32::MIN + 1);
43757 assert_eq_m128i(r, e);
43758 }
43759
43760 #[simd_test(enable = "avx512f")]
43761 unsafe fn test_mm512_add_ps() {
43762 #[rustfmt::skip]
43763 let a = _mm512_setr_ps(
43764 0., 1., -1., f32::MAX,
43765 f32::MIN, 100., -100., -32.,
43766 0., 1., -1., f32::MAX,
43767 f32::MIN, 100., -100., -32.,
43768 );
43769 let b = _mm512_set1_ps(1.);
43770 let r = _mm512_add_ps(a, b);
43771 #[rustfmt::skip]
43772 let e = _mm512_setr_ps(
43773 1., 2., 0., f32::MAX,
43774 f32::MIN + 1., 101., -99., -31.,
43775 1., 2., 0., f32::MAX,
43776 f32::MIN + 1., 101., -99., -31.,
43777 );
43778 assert_eq_m512(r, e);
43779 }
43780
43781 #[simd_test(enable = "avx512f")]
43782 unsafe fn test_mm512_mask_add_ps() {
43783 #[rustfmt::skip]
43784 let a = _mm512_setr_ps(
43785 0., 1., -1., f32::MAX,
43786 f32::MIN, 100., -100., -32.,
43787 0., 1., -1., f32::MAX,
43788 f32::MIN, 100., -100., -32.,
43789 );
43790 let b = _mm512_set1_ps(1.);
43791 let r = _mm512_mask_add_ps(a, 0, a, b);
43792 assert_eq_m512(r, a);
43793 let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
43794 #[rustfmt::skip]
43795 let e = _mm512_setr_ps(
43796 1., 2., 0., f32::MAX,
43797 f32::MIN + 1., 101., -99., -31.,
43798 0., 1., -1., f32::MAX,
43799 f32::MIN, 100., -100., -32.,
43800 );
43801 assert_eq_m512(r, e);
43802 }
43803
43804 #[simd_test(enable = "avx512f")]
43805 unsafe fn test_mm512_maskz_add_ps() {
43806 #[rustfmt::skip]
43807 let a = _mm512_setr_ps(
43808 0., 1., -1., f32::MAX,
43809 f32::MIN, 100., -100., -32.,
43810 0., 1., -1., f32::MAX,
43811 f32::MIN, 100., -100., -32.,
43812 );
43813 let b = _mm512_set1_ps(1.);
43814 let r = _mm512_maskz_add_ps(0, a, b);
43815 assert_eq_m512(r, _mm512_setzero_ps());
43816 let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
43817 #[rustfmt::skip]
43818 let e = _mm512_setr_ps(
43819 1., 2., 0., f32::MAX,
43820 f32::MIN + 1., 101., -99., -31.,
43821 0., 0., 0., 0.,
43822 0., 0., 0., 0.,
43823 );
43824 assert_eq_m512(r, e);
43825 }
43826
43827 #[simd_test(enable = "avx512f,avx512vl")]
43828 unsafe fn test_mm256_mask_add_ps() {
43829 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43830 let b = _mm256_set1_ps(1.);
43831 let r = _mm256_mask_add_ps(a, 0, a, b);
43832 assert_eq_m256(r, a);
43833 let r = _mm256_mask_add_ps(a, 0b11111111, a, b);
43834 let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
43835 assert_eq_m256(r, e);
43836 }
43837
43838 #[simd_test(enable = "avx512f,avx512vl")]
43839 unsafe fn test_mm256_maskz_add_ps() {
43840 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43841 let b = _mm256_set1_ps(1.);
43842 let r = _mm256_maskz_add_ps(0, a, b);
43843 assert_eq_m256(r, _mm256_setzero_ps());
43844 let r = _mm256_maskz_add_ps(0b11111111, a, b);
43845 let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
43846 assert_eq_m256(r, e);
43847 }
43848
43849 #[simd_test(enable = "avx512f,avx512vl")]
43850 unsafe fn test_mm_mask_add_ps() {
43851 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43852 let b = _mm_set1_ps(1.);
43853 let r = _mm_mask_add_ps(a, 0, a, b);
43854 assert_eq_m128(r, a);
43855 let r = _mm_mask_add_ps(a, 0b00001111, a, b);
43856 let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
43857 assert_eq_m128(r, e);
43858 }
43859
43860 #[simd_test(enable = "avx512f,avx512vl")]
43861 unsafe fn test_mm_maskz_add_ps() {
43862 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43863 let b = _mm_set1_ps(1.);
43864 let r = _mm_maskz_add_ps(0, a, b);
43865 assert_eq_m128(r, _mm_setzero_ps());
43866 let r = _mm_maskz_add_ps(0b00001111, a, b);
43867 let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
43868 assert_eq_m128(r, e);
43869 }
43870
43871 #[simd_test(enable = "avx512f")]
43872 unsafe fn test_mm512_sub_epi32() {
43873 #[rustfmt::skip]
43874 let a = _mm512_setr_epi32(
43875 0, 1, -1, i32::MAX,
43876 i32::MIN, 100, -100, -32,
43877 0, 1, -1, i32::MAX,
43878 i32::MIN, 100, -100, -32,
43879 );
43880 let b = _mm512_set1_epi32(1);
43881 let r = _mm512_sub_epi32(a, b);
43882 #[rustfmt::skip]
43883 let e = _mm512_setr_epi32(
43884 -1, 0, -2, i32::MAX - 1,
43885 i32::MAX, 99, -101, -33,
43886 -1, 0, -2, i32::MAX - 1,
43887 i32::MAX, 99, -101, -33,
43888 );
43889 assert_eq_m512i(r, e);
43890 }
43891
43892 #[simd_test(enable = "avx512f")]
43893 unsafe fn test_mm512_mask_sub_epi32() {
43894 #[rustfmt::skip]
43895 let a = _mm512_setr_epi32(
43896 0, 1, -1, i32::MAX,
43897 i32::MIN, 100, -100, -32,
43898 0, 1, -1, i32::MAX,
43899 i32::MIN, 100, -100, -32,
43900 );
43901 let b = _mm512_set1_epi32(1);
43902 let r = _mm512_mask_sub_epi32(a, 0, a, b);
43903 assert_eq_m512i(r, a);
43904 let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
43905 #[rustfmt::skip]
43906 let e = _mm512_setr_epi32(
43907 -1, 0, -2, i32::MAX - 1,
43908 i32::MAX, 99, -101, -33,
43909 0, 1, -1, i32::MAX,
43910 i32::MIN, 100, -100, -32,
43911 );
43912 assert_eq_m512i(r, e);
43913 }
43914
43915 #[simd_test(enable = "avx512f")]
43916 unsafe fn test_mm512_maskz_sub_epi32() {
43917 #[rustfmt::skip]
43918 let a = _mm512_setr_epi32(
43919 0, 1, -1, i32::MAX,
43920 i32::MIN, 100, -100, -32,
43921 0, 1, -1, i32::MAX,
43922 i32::MIN, 100, -100, -32,
43923 );
43924 let b = _mm512_set1_epi32(1);
43925 let r = _mm512_maskz_sub_epi32(0, a, b);
43926 assert_eq_m512i(r, _mm512_setzero_si512());
43927 let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b);
43928 #[rustfmt::skip]
43929 let e = _mm512_setr_epi32(
43930 -1, 0, -2, i32::MAX - 1,
43931 i32::MAX, 99, -101, -33,
43932 0, 0, 0, 0,
43933 0, 0, 0, 0,
43934 );
43935 assert_eq_m512i(r, e);
43936 }
43937
43938 #[simd_test(enable = "avx512f,avx512vl")]
43939 unsafe fn test_mm256_mask_sub_epi32() {
43940 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43941 let b = _mm256_set1_epi32(1);
43942 let r = _mm256_mask_sub_epi32(a, 0, a, b);
43943 assert_eq_m256i(r, a);
43944 let r = _mm256_mask_sub_epi32(a, 0b11111111, a, b);
43945 let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
43946 assert_eq_m256i(r, e);
43947 }
43948
43949 #[simd_test(enable = "avx512f,avx512vl")]
43950 unsafe fn test_mm256_maskz_sub_epi32() {
43951 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43952 let b = _mm256_set1_epi32(1);
43953 let r = _mm256_maskz_sub_epi32(0, a, b);
43954 assert_eq_m256i(r, _mm256_setzero_si256());
43955 let r = _mm256_maskz_sub_epi32(0b11111111, a, b);
43956 let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
43957 assert_eq_m256i(r, e);
43958 }
43959
43960 #[simd_test(enable = "avx512f,avx512vl")]
43961 unsafe fn test_mm_mask_sub_epi32() {
43962 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43963 let b = _mm_set1_epi32(1);
43964 let r = _mm_mask_sub_epi32(a, 0, a, b);
43965 assert_eq_m128i(r, a);
43966 let r = _mm_mask_sub_epi32(a, 0b00001111, a, b);
43967 let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
43968 assert_eq_m128i(r, e);
43969 }
43970
43971 #[simd_test(enable = "avx512f,avx512vl")]
43972 unsafe fn test_mm_maskz_sub_epi32() {
43973 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43974 let b = _mm_set1_epi32(1);
43975 let r = _mm_maskz_sub_epi32(0, a, b);
43976 assert_eq_m128i(r, _mm_setzero_si128());
43977 let r = _mm_maskz_sub_epi32(0b00001111, a, b);
43978 let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
43979 assert_eq_m128i(r, e);
43980 }
43981
43982 #[simd_test(enable = "avx512f")]
43983 unsafe fn test_mm512_sub_ps() {
43984 #[rustfmt::skip]
43985 let a = _mm512_setr_ps(
43986 0., 1., -1., f32::MAX,
43987 f32::MIN, 100., -100., -32.,
43988 0., 1., -1., f32::MAX,
43989 f32::MIN, 100., -100., -32.,
43990 );
43991 let b = _mm512_set1_ps(1.);
43992 let r = _mm512_sub_ps(a, b);
43993 #[rustfmt::skip]
43994 let e = _mm512_setr_ps(
43995 -1., 0., -2., f32::MAX - 1.,
43996 f32::MIN, 99., -101., -33.,
43997 -1., 0., -2., f32::MAX - 1.,
43998 f32::MIN, 99., -101., -33.,
43999 );
44000 assert_eq_m512(r, e);
44001 }
44002
44003 #[simd_test(enable = "avx512f")]
44004 unsafe fn test_mm512_mask_sub_ps() {
44005 #[rustfmt::skip]
44006 let a = _mm512_setr_ps(
44007 0., 1., -1., f32::MAX,
44008 f32::MIN, 100., -100., -32.,
44009 0., 1., -1., f32::MAX,
44010 f32::MIN, 100., -100., -32.,
44011 );
44012 let b = _mm512_set1_ps(1.);
44013 let r = _mm512_mask_sub_ps(a, 0, a, b);
44014 assert_eq_m512(r, a);
44015 let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b);
44016 #[rustfmt::skip]
44017 let e = _mm512_setr_ps(
44018 -1., 0., -2., f32::MAX - 1.,
44019 f32::MIN, 99., -101., -33.,
44020 0., 1., -1., f32::MAX,
44021 f32::MIN, 100., -100., -32.,
44022 );
44023 assert_eq_m512(r, e);
44024 }
44025
44026 #[simd_test(enable = "avx512f")]
44027 unsafe fn test_mm512_maskz_sub_ps() {
44028 #[rustfmt::skip]
44029 let a = _mm512_setr_ps(
44030 0., 1., -1., f32::MAX,
44031 f32::MIN, 100., -100., -32.,
44032 0., 1., -1., f32::MAX,
44033 f32::MIN, 100., -100., -32.,
44034 );
44035 let b = _mm512_set1_ps(1.);
44036 let r = _mm512_maskz_sub_ps(0, a, b);
44037 assert_eq_m512(r, _mm512_setzero_ps());
44038 let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b);
44039 #[rustfmt::skip]
44040 let e = _mm512_setr_ps(
44041 -1., 0., -2., f32::MAX - 1.,
44042 f32::MIN, 99., -101., -33.,
44043 0., 0., 0., 0.,
44044 0., 0., 0., 0.,
44045 );
44046 assert_eq_m512(r, e);
44047 }
44048
44049 #[simd_test(enable = "avx512f,avx512vl")]
44050 unsafe fn test_mm256_mask_sub_ps() {
44051 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44052 let b = _mm256_set1_ps(1.);
44053 let r = _mm256_mask_sub_ps(a, 0, a, b);
44054 assert_eq_m256(r, a);
44055 let r = _mm256_mask_sub_ps(a, 0b11111111, a, b);
44056 let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
44057 assert_eq_m256(r, e);
44058 }
44059
44060 #[simd_test(enable = "avx512f,avx512vl")]
44061 unsafe fn test_mm256_maskz_sub_ps() {
44062 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44063 let b = _mm256_set1_ps(1.);
44064 let r = _mm256_maskz_sub_ps(0, a, b);
44065 assert_eq_m256(r, _mm256_setzero_ps());
44066 let r = _mm256_maskz_sub_ps(0b11111111, a, b);
44067 let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
44068 assert_eq_m256(r, e);
44069 }
44070
44071 #[simd_test(enable = "avx512f,avx512vl")]
44072 unsafe fn test_mm_mask_sub_ps() {
44073 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44074 let b = _mm_set1_ps(1.);
44075 let r = _mm_mask_sub_ps(a, 0, a, b);
44076 assert_eq_m128(r, a);
44077 let r = _mm_mask_sub_ps(a, 0b00001111, a, b);
44078 let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
44079 assert_eq_m128(r, e);
44080 }
44081
44082 #[simd_test(enable = "avx512f,avx512vl")]
44083 unsafe fn test_mm_maskz_sub_ps() {
44084 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44085 let b = _mm_set1_ps(1.);
44086 let r = _mm_maskz_sub_ps(0, a, b);
44087 assert_eq_m128(r, _mm_setzero_ps());
44088 let r = _mm_maskz_sub_ps(0b00001111, a, b);
44089 let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
44090 assert_eq_m128(r, e);
44091 }
44092
44093 #[simd_test(enable = "avx512f")]
44094 unsafe fn test_mm512_mullo_epi32() {
44095 #[rustfmt::skip]
44096 let a = _mm512_setr_epi32(
44097 0, 1, -1, i32::MAX,
44098 i32::MIN, 100, -100, -32,
44099 0, 1, -1, i32::MAX,
44100 i32::MIN, 100, -100, -32,
44101 );
44102 let b = _mm512_set1_epi32(2);
44103 let r = _mm512_mullo_epi32(a, b);
44104 let e = _mm512_setr_epi32(
44105 0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64,
44106 );
44107 assert_eq_m512i(r, e);
44108 }
44109
44110 #[simd_test(enable = "avx512f")]
44111 unsafe fn test_mm512_mask_mullo_epi32() {
44112 #[rustfmt::skip]
44113 let a = _mm512_setr_epi32(
44114 0, 1, -1, i32::MAX,
44115 i32::MIN, 100, -100, -32,
44116 0, 1, -1, i32::MAX,
44117 i32::MIN, 100, -100, -32,
44118 );
44119 let b = _mm512_set1_epi32(2);
44120 let r = _mm512_mask_mullo_epi32(a, 0, a, b);
44121 assert_eq_m512i(r, a);
44122 let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b);
44123 #[rustfmt::skip]
44124 let e = _mm512_setr_epi32(
44125 0, 2, -2, -2,
44126 0, 200, -200, -64,
44127 0, 1, -1, i32::MAX,
44128 i32::MIN, 100, -100, -32,
44129 );
44130 assert_eq_m512i(r, e);
44131 }
44132
44133 #[simd_test(enable = "avx512f")]
44134 unsafe fn test_mm512_maskz_mullo_epi32() {
44135 #[rustfmt::skip]
44136 let a = _mm512_setr_epi32(
44137 0, 1, -1, i32::MAX,
44138 i32::MIN, 100, -100, -32,
44139 0, 1, -1, i32::MAX,
44140 i32::MIN, 100, -100, -32,
44141 );
44142 let b = _mm512_set1_epi32(2);
44143 let r = _mm512_maskz_mullo_epi32(0, a, b);
44144 assert_eq_m512i(r, _mm512_setzero_si512());
44145 let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b);
44146 let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0);
44147 assert_eq_m512i(r, e);
44148 }
44149
44150 #[simd_test(enable = "avx512f,avx512vl")]
44151 unsafe fn test_mm256_mask_mullo_epi32() {
44152 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
44153 let b = _mm256_set1_epi32(2);
44154 let r = _mm256_mask_mullo_epi32(a, 0, a, b);
44155 assert_eq_m256i(r, a);
44156 let r = _mm256_mask_mullo_epi32(a, 0b11111111, a, b);
44157 let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
44158 assert_eq_m256i(r, e);
44159 }
44160
44161 #[simd_test(enable = "avx512f,avx512vl")]
44162 unsafe fn test_mm256_maskz_mullo_epi32() {
44163 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
44164 let b = _mm256_set1_epi32(2);
44165 let r = _mm256_maskz_mullo_epi32(0, a, b);
44166 assert_eq_m256i(r, _mm256_setzero_si256());
44167 let r = _mm256_maskz_mullo_epi32(0b11111111, a, b);
44168 let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
44169 assert_eq_m256i(r, e);
44170 }
44171
44172 #[simd_test(enable = "avx512f,avx512vl")]
44173 unsafe fn test_mm_mask_mullo_epi32() {
44174 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
44175 let b = _mm_set1_epi32(2);
44176 let r = _mm_mask_mullo_epi32(a, 0, a, b);
44177 assert_eq_m128i(r, a);
44178 let r = _mm_mask_mullo_epi32(a, 0b00001111, a, b);
44179 let e = _mm_set_epi32(2, -2, -2, 0);
44180 assert_eq_m128i(r, e);
44181 }
44182
44183 #[simd_test(enable = "avx512f,avx512vl")]
44184 unsafe fn test_mm_maskz_mullo_epi32() {
44185 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
44186 let b = _mm_set1_epi32(2);
44187 let r = _mm_maskz_mullo_epi32(0, a, b);
44188 assert_eq_m128i(r, _mm_setzero_si128());
44189 let r = _mm_maskz_mullo_epi32(0b00001111, a, b);
44190 let e = _mm_set_epi32(2, -2, -2, 0);
44191 assert_eq_m128i(r, e);
44192 }
44193
44194 #[simd_test(enable = "avx512f")]
44195 unsafe fn test_mm512_mul_ps() {
44196 #[rustfmt::skip]
44197 let a = _mm512_setr_ps(
44198 0., 1., -1., f32::MAX,
44199 f32::MIN, 100., -100., -32.,
44200 0., 1., -1., f32::MAX,
44201 f32::MIN, 100., -100., -32.,
44202 );
44203 let b = _mm512_set1_ps(2.);
44204 let r = _mm512_mul_ps(a, b);
44205 #[rustfmt::skip]
44206 let e = _mm512_setr_ps(
44207 0., 2., -2., f32::INFINITY,
44208 f32::NEG_INFINITY, 200., -200., -64.,
44209 0., 2., -2., f32::INFINITY,
44210 f32::NEG_INFINITY, 200., -200.,
44211 -64.,
44212 );
44213 assert_eq_m512(r, e);
44214 }
44215
44216 #[simd_test(enable = "avx512f")]
44217 unsafe fn test_mm512_mask_mul_ps() {
44218 #[rustfmt::skip]
44219 let a = _mm512_setr_ps(
44220 0., 1., -1., f32::MAX,
44221 f32::MIN, 100., -100., -32.,
44222 0., 1., -1., f32::MAX,
44223 f32::MIN, 100., -100., -32.,
44224 );
44225 let b = _mm512_set1_ps(2.);
44226 let r = _mm512_mask_mul_ps(a, 0, a, b);
44227 assert_eq_m512(r, a);
44228 let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b);
44229 #[rustfmt::skip]
44230 let e = _mm512_setr_ps(
44231 0., 2., -2., f32::INFINITY,
44232 f32::NEG_INFINITY, 200., -200., -64.,
44233 0., 1., -1., f32::MAX,
44234 f32::MIN, 100., -100., -32.,
44235 );
44236 assert_eq_m512(r, e);
44237 }
44238
44239 #[simd_test(enable = "avx512f")]
44240 unsafe fn test_mm512_maskz_mul_ps() {
44241 #[rustfmt::skip]
44242 let a = _mm512_setr_ps(
44243 0., 1., -1., f32::MAX,
44244 f32::MIN, 100., -100., -32.,
44245 0., 1., -1., f32::MAX,
44246 f32::MIN, 100., -100., -32.,
44247 );
44248 let b = _mm512_set1_ps(2.);
44249 let r = _mm512_maskz_mul_ps(0, a, b);
44250 assert_eq_m512(r, _mm512_setzero_ps());
44251 let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b);
44252 #[rustfmt::skip]
44253 let e = _mm512_setr_ps(
44254 0., 2., -2., f32::INFINITY,
44255 f32::NEG_INFINITY, 200., -200., -64.,
44256 0., 0., 0., 0.,
44257 0., 0., 0., 0.,
44258 );
44259 assert_eq_m512(r, e);
44260 }
44261
44262 #[simd_test(enable = "avx512f,avx512vl")]
44263 unsafe fn test_mm256_mask_mul_ps() {
44264 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44265 let b = _mm256_set1_ps(2.);
44266 let r = _mm256_mask_mul_ps(a, 0, a, b);
44267 assert_eq_m256(r, a);
44268 let r = _mm256_mask_mul_ps(a, 0b11111111, a, b);
44269 #[rustfmt::skip]
44270 let e = _mm256_set_ps(
44271 0., 2., -2., f32::INFINITY,
44272 f32::NEG_INFINITY, 200., -200., -64.,
44273 );
44274 assert_eq_m256(r, e);
44275 }
44276
44277 #[simd_test(enable = "avx512f,avx512vl")]
44278 unsafe fn test_mm256_maskz_mul_ps() {
44279 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44280 let b = _mm256_set1_ps(2.);
44281 let r = _mm256_maskz_mul_ps(0, a, b);
44282 assert_eq_m256(r, _mm256_setzero_ps());
44283 let r = _mm256_maskz_mul_ps(0b11111111, a, b);
44284 #[rustfmt::skip]
44285 let e = _mm256_set_ps(
44286 0., 2., -2., f32::INFINITY,
44287 f32::NEG_INFINITY, 200., -200., -64.,
44288 );
44289 assert_eq_m256(r, e);
44290 }
44291
44292 #[simd_test(enable = "avx512f,avx512vl")]
44293 unsafe fn test_mm_mask_mul_ps() {
44294 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44295 let b = _mm_set1_ps(2.);
44296 let r = _mm_mask_mul_ps(a, 0, a, b);
44297 assert_eq_m128(r, a);
44298 let r = _mm_mask_mul_ps(a, 0b00001111, a, b);
44299 let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
44300 assert_eq_m128(r, e);
44301 }
44302
44303 #[simd_test(enable = "avx512f,avx512vl")]
44304 unsafe fn test_mm_maskz_mul_ps() {
44305 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44306 let b = _mm_set1_ps(2.);
44307 let r = _mm_maskz_mul_ps(0, a, b);
44308 assert_eq_m128(r, _mm_setzero_ps());
44309 let r = _mm_maskz_mul_ps(0b00001111, a, b);
44310 let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
44311 assert_eq_m128(r, e);
44312 }
44313
44314 #[simd_test(enable = "avx512f")]
44315 unsafe fn test_mm512_div_ps() {
44316 let a = _mm512_setr_ps(
44317 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44318 );
44319 let b = _mm512_setr_ps(
44320 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44321 );
44322 let r = _mm512_div_ps(a, b);
44323 #[rustfmt::skip]
44324 let e = _mm512_setr_ps(
44325 0., 0.5, -0.5, -1.,
44326 50., f32::INFINITY, -50., -16.,
44327 0., 0.5, -0.5, 500.,
44328 f32::NEG_INFINITY, 50., -50., -16.,
44329 );
44330 assert_eq_m512(r, e); // 0/0 = NAN
44331 }
44332
44333 #[simd_test(enable = "avx512f")]
44334 unsafe fn test_mm512_mask_div_ps() {
44335 let a = _mm512_setr_ps(
44336 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44337 );
44338 let b = _mm512_setr_ps(
44339 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44340 );
44341 let r = _mm512_mask_div_ps(a, 0, a, b);
44342 assert_eq_m512(r, a);
44343 let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b);
44344 #[rustfmt::skip]
44345 let e = _mm512_setr_ps(
44346 0., 0.5, -0.5, -1.,
44347 50., f32::INFINITY, -50., -16.,
44348 0., 1., -1., 1000.,
44349 -131., 100., -100., -32.,
44350 );
44351 assert_eq_m512(r, e);
44352 }
44353
44354 #[simd_test(enable = "avx512f")]
44355 unsafe fn test_mm512_maskz_div_ps() {
44356 let a = _mm512_setr_ps(
44357 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44358 );
44359 let b = _mm512_setr_ps(
44360 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44361 );
44362 let r = _mm512_maskz_div_ps(0, a, b);
44363 assert_eq_m512(r, _mm512_setzero_ps());
44364 let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b);
44365 #[rustfmt::skip]
44366 let e = _mm512_setr_ps(
44367 0., 0.5, -0.5, -1.,
44368 50., f32::INFINITY, -50., -16.,
44369 0., 0., 0., 0.,
44370 0., 0., 0., 0.,
44371 );
44372 assert_eq_m512(r, e);
44373 }
44374
44375 #[simd_test(enable = "avx512f,avx512vl")]
44376 unsafe fn test_mm256_mask_div_ps() {
44377 let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
44378 let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
44379 let r = _mm256_mask_div_ps(a, 0, a, b);
44380 assert_eq_m256(r, a);
44381 let r = _mm256_mask_div_ps(a, 0b11111111, a, b);
44382 let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
44383 assert_eq_m256(r, e);
44384 }
44385
44386 #[simd_test(enable = "avx512f,avx512vl")]
44387 unsafe fn test_mm256_maskz_div_ps() {
44388 let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
44389 let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
44390 let r = _mm256_maskz_div_ps(0, a, b);
44391 assert_eq_m256(r, _mm256_setzero_ps());
44392 let r = _mm256_maskz_div_ps(0b11111111, a, b);
44393 let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
44394 assert_eq_m256(r, e);
44395 }
44396
44397 #[simd_test(enable = "avx512f,avx512vl")]
44398 unsafe fn test_mm_mask_div_ps() {
44399 let a = _mm_set_ps(100., 100., -100., -32.);
44400 let b = _mm_set_ps(2., 0., 2., 2.);
44401 let r = _mm_mask_div_ps(a, 0, a, b);
44402 assert_eq_m128(r, a);
44403 let r = _mm_mask_div_ps(a, 0b00001111, a, b);
44404 let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
44405 assert_eq_m128(r, e);
44406 }
44407
44408 #[simd_test(enable = "avx512f,avx512vl")]
44409 unsafe fn test_mm_maskz_div_ps() {
44410 let a = _mm_set_ps(100., 100., -100., -32.);
44411 let b = _mm_set_ps(2., 0., 2., 2.);
44412 let r = _mm_maskz_div_ps(0, a, b);
44413 assert_eq_m128(r, _mm_setzero_ps());
44414 let r = _mm_maskz_div_ps(0b00001111, a, b);
44415 let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
44416 assert_eq_m128(r, e);
44417 }
44418
44419 #[simd_test(enable = "avx512f")]
44420 unsafe fn test_mm512_max_epi32() {
44421 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44422 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44423 let r = _mm512_max_epi32(a, b);
44424 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44425 assert_eq_m512i(r, e);
44426 }
44427
44428 #[simd_test(enable = "avx512f")]
44429 unsafe fn test_mm512_mask_max_epi32() {
44430 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44431 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44432 let r = _mm512_mask_max_epi32(a, 0, a, b);
44433 assert_eq_m512i(r, a);
44434 let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b);
44435 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44436 assert_eq_m512i(r, e);
44437 }
44438
44439 #[simd_test(enable = "avx512f")]
44440 unsafe fn test_mm512_maskz_max_epi32() {
44441 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44442 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44443 let r = _mm512_maskz_max_epi32(0, a, b);
44444 assert_eq_m512i(r, _mm512_setzero_si512());
44445 let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b);
44446 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
44447 assert_eq_m512i(r, e);
44448 }
44449
44450 #[simd_test(enable = "avx512f,avx512vl")]
44451 unsafe fn test_mm256_mask_max_epi32() {
44452 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44453 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44454 let r = _mm256_mask_max_epi32(a, 0, a, b);
44455 assert_eq_m256i(r, a);
44456 let r = _mm256_mask_max_epi32(a, 0b11111111, a, b);
44457 let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44458 assert_eq_m256i(r, e);
44459 }
44460
44461 #[simd_test(enable = "avx512f,avx512vl")]
44462 unsafe fn test_mm256_maskz_max_epi32() {
44463 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44464 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44465 let r = _mm256_maskz_max_epi32(0, a, b);
44466 assert_eq_m256i(r, _mm256_setzero_si256());
44467 let r = _mm256_maskz_max_epi32(0b11111111, a, b);
44468 let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44469 assert_eq_m256i(r, e);
44470 }
44471
44472 #[simd_test(enable = "avx512f,avx512vl")]
44473 unsafe fn test_mm_mask_max_epi32() {
44474 let a = _mm_set_epi32(0, 1, 2, 3);
44475 let b = _mm_set_epi32(3, 2, 1, 0);
44476 let r = _mm_mask_max_epi32(a, 0, a, b);
44477 assert_eq_m128i(r, a);
44478 let r = _mm_mask_max_epi32(a, 0b00001111, a, b);
44479 let e = _mm_set_epi32(3, 2, 2, 3);
44480 assert_eq_m128i(r, e);
44481 }
44482
44483 #[simd_test(enable = "avx512f,avx512vl")]
44484 unsafe fn test_mm_maskz_max_epi32() {
44485 let a = _mm_set_epi32(0, 1, 2, 3);
44486 let b = _mm_set_epi32(3, 2, 1, 0);
44487 let r = _mm_maskz_max_epi32(0, a, b);
44488 assert_eq_m128i(r, _mm_setzero_si128());
44489 let r = _mm_maskz_max_epi32(0b00001111, a, b);
44490 let e = _mm_set_epi32(3, 2, 2, 3);
44491 assert_eq_m128i(r, e);
44492 }
44493
44494 #[simd_test(enable = "avx512f")]
44495 unsafe fn test_mm512_max_ps() {
44496 let a = _mm512_setr_ps(
44497 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44498 );
44499 let b = _mm512_setr_ps(
44500 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44501 );
44502 let r = _mm512_max_ps(a, b);
44503 let e = _mm512_setr_ps(
44504 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
44505 );
44506 assert_eq_m512(r, e);
44507 }
44508
44509 #[simd_test(enable = "avx512f")]
44510 unsafe fn test_mm512_mask_max_ps() {
44511 let a = _mm512_setr_ps(
44512 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44513 );
44514 let b = _mm512_setr_ps(
44515 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44516 );
44517 let r = _mm512_mask_max_ps(a, 0, a, b);
44518 assert_eq_m512(r, a);
44519 let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b);
44520 let e = _mm512_setr_ps(
44521 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
44522 );
44523 assert_eq_m512(r, e);
44524 }
44525
44526 #[simd_test(enable = "avx512f")]
44527 unsafe fn test_mm512_maskz_max_ps() {
44528 let a = _mm512_setr_ps(
44529 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44530 );
44531 let b = _mm512_setr_ps(
44532 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44533 );
44534 let r = _mm512_maskz_max_ps(0, a, b);
44535 assert_eq_m512(r, _mm512_setzero_ps());
44536 let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b);
44537 let e = _mm512_setr_ps(
44538 15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
44539 );
44540 assert_eq_m512(r, e);
44541 }
44542
44543 #[simd_test(enable = "avx512f,avx512vl")]
44544 unsafe fn test_mm256_mask_max_ps() {
44545 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44546 let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44547 let r = _mm256_mask_max_ps(a, 0, a, b);
44548 assert_eq_m256(r, a);
44549 let r = _mm256_mask_max_ps(a, 0b11111111, a, b);
44550 let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
44551 assert_eq_m256(r, e);
44552 }
44553
44554 #[simd_test(enable = "avx512f,avx512vl")]
44555 unsafe fn test_mm256_maskz_max_ps() {
44556 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44557 let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44558 let r = _mm256_maskz_max_ps(0, a, b);
44559 assert_eq_m256(r, _mm256_setzero_ps());
44560 let r = _mm256_maskz_max_ps(0b11111111, a, b);
44561 let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
44562 assert_eq_m256(r, e);
44563 }
44564
44565 #[simd_test(enable = "avx512f,avx512vl")]
44566 unsafe fn test_mm_mask_max_ps() {
44567 let a = _mm_set_ps(0., 1., 2., 3.);
44568 let b = _mm_set_ps(3., 2., 1., 0.);
44569 let r = _mm_mask_max_ps(a, 0, a, b);
44570 assert_eq_m128(r, a);
44571 let r = _mm_mask_max_ps(a, 0b00001111, a, b);
44572 let e = _mm_set_ps(3., 2., 2., 3.);
44573 assert_eq_m128(r, e);
44574 }
44575
44576 #[simd_test(enable = "avx512f,avx512vl")]
44577 unsafe fn test_mm_maskz_max_ps() {
44578 let a = _mm_set_ps(0., 1., 2., 3.);
44579 let b = _mm_set_ps(3., 2., 1., 0.);
44580 let r = _mm_maskz_max_ps(0, a, b);
44581 assert_eq_m128(r, _mm_setzero_ps());
44582 let r = _mm_mask_max_ps(a, 0b00001111, a, b);
44583 let e = _mm_set_ps(3., 2., 2., 3.);
44584 assert_eq_m128(r, e);
44585 }
44586
44587 #[simd_test(enable = "avx512f")]
44588 unsafe fn test_mm512_max_epu32() {
44589 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44590 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44591 let r = _mm512_max_epu32(a, b);
44592 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44593 assert_eq_m512i(r, e);
44594 }
44595
44596 #[simd_test(enable = "avx512f")]
44597 unsafe fn test_mm512_mask_max_epu32() {
44598 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44599 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44600 let r = _mm512_mask_max_epu32(a, 0, a, b);
44601 assert_eq_m512i(r, a);
44602 let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b);
44603 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44604 assert_eq_m512i(r, e);
44605 }
44606
44607 #[simd_test(enable = "avx512f")]
44608 unsafe fn test_mm512_maskz_max_epu32() {
44609 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44610 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44611 let r = _mm512_maskz_max_epu32(0, a, b);
44612 assert_eq_m512i(r, _mm512_setzero_si512());
44613 let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b);
44614 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
44615 assert_eq_m512i(r, e);
44616 }
44617
44618 #[simd_test(enable = "avx512f,avx512vl")]
44619 unsafe fn test_mm256_mask_max_epu32() {
44620 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44621 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44622 let r = _mm256_mask_max_epu32(a, 0, a, b);
44623 assert_eq_m256i(r, a);
44624 let r = _mm256_mask_max_epu32(a, 0b11111111, a, b);
44625 let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44626 assert_eq_m256i(r, e);
44627 }
44628
44629 #[simd_test(enable = "avx512f,avx512vl")]
44630 unsafe fn test_mm256_maskz_max_epu32() {
44631 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44632 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44633 let r = _mm256_maskz_max_epu32(0, a, b);
44634 assert_eq_m256i(r, _mm256_setzero_si256());
44635 let r = _mm256_maskz_max_epu32(0b11111111, a, b);
44636 let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44637 assert_eq_m256i(r, e);
44638 }
44639
44640 #[simd_test(enable = "avx512f,avx512vl")]
44641 unsafe fn test_mm_mask_max_epu32() {
44642 let a = _mm_set_epi32(0, 1, 2, 3);
44643 let b = _mm_set_epi32(3, 2, 1, 0);
44644 let r = _mm_mask_max_epu32(a, 0, a, b);
44645 assert_eq_m128i(r, a);
44646 let r = _mm_mask_max_epu32(a, 0b00001111, a, b);
44647 let e = _mm_set_epi32(3, 2, 2, 3);
44648 assert_eq_m128i(r, e);
44649 }
44650
44651 #[simd_test(enable = "avx512f,avx512vl")]
44652 unsafe fn test_mm_maskz_max_epu32() {
44653 let a = _mm_set_epi32(0, 1, 2, 3);
44654 let b = _mm_set_epi32(3, 2, 1, 0);
44655 let r = _mm_maskz_max_epu32(0, a, b);
44656 assert_eq_m128i(r, _mm_setzero_si128());
44657 let r = _mm_maskz_max_epu32(0b00001111, a, b);
44658 let e = _mm_set_epi32(3, 2, 2, 3);
44659 assert_eq_m128i(r, e);
44660 }
44661
44662 #[simd_test(enable = "avx512f")]
44663 unsafe fn test_mm512_min_epi32() {
44664 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44665 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44666 let r = _mm512_min_epi32(a, b);
44667 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
44668 assert_eq_m512i(r, e);
44669 }
44670
44671 #[simd_test(enable = "avx512f")]
44672 unsafe fn test_mm512_mask_min_epi32() {
44673 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44674 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44675 let r = _mm512_mask_min_epi32(a, 0, a, b);
44676 assert_eq_m512i(r, a);
44677 let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b);
44678 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44679 assert_eq_m512i(r, e);
44680 }
44681
44682 #[simd_test(enable = "avx512f")]
44683 unsafe fn test_mm512_maskz_min_epi32() {
44684 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44685 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44686 let r = _mm512_maskz_min_epi32(0, a, b);
44687 assert_eq_m512i(r, _mm512_setzero_si512());
44688 let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b);
44689 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
44690 assert_eq_m512i(r, e);
44691 }
44692
44693 #[simd_test(enable = "avx512f,avx512vl")]
44694 unsafe fn test_mm256_mask_min_epi32() {
44695 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44696 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44697 let r = _mm256_mask_min_epi32(a, 0, a, b);
44698 assert_eq_m256i(r, a);
44699 let r = _mm256_mask_min_epi32(a, 0b11111111, a, b);
44700 let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44701 assert_eq_m256i(r, e);
44702 }
44703
44704 #[simd_test(enable = "avx512f,avx512vl")]
44705 unsafe fn test_mm256_maskz_min_epi32() {
44706 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44707 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44708 let r = _mm256_maskz_min_epi32(0, a, b);
44709 assert_eq_m256i(r, _mm256_setzero_si256());
44710 let r = _mm256_maskz_min_epi32(0b11111111, a, b);
44711 let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44712 assert_eq_m256i(r, e);
44713 }
44714
44715 #[simd_test(enable = "avx512f,avx512vl")]
44716 unsafe fn test_mm_mask_min_epi32() {
44717 let a = _mm_set_epi32(0, 1, 2, 3);
44718 let b = _mm_set_epi32(3, 2, 1, 0);
44719 let r = _mm_mask_min_epi32(a, 0, a, b);
44720 assert_eq_m128i(r, a);
44721 let r = _mm_mask_min_epi32(a, 0b00001111, a, b);
44722 let e = _mm_set_epi32(0, 1, 1, 0);
44723 assert_eq_m128i(r, e);
44724 }
44725
44726 #[simd_test(enable = "avx512f,avx512vl")]
44727 unsafe fn test_mm_maskz_min_epi32() {
44728 let a = _mm_set_epi32(0, 1, 2, 3);
44729 let b = _mm_set_epi32(3, 2, 1, 0);
44730 let r = _mm_maskz_min_epi32(0, a, b);
44731 assert_eq_m128i(r, _mm_setzero_si128());
44732 let r = _mm_maskz_min_epi32(0b00001111, a, b);
44733 let e = _mm_set_epi32(0, 1, 1, 0);
44734 assert_eq_m128i(r, e);
44735 }
44736
44737 #[simd_test(enable = "avx512f")]
44738 unsafe fn test_mm512_min_ps() {
44739 let a = _mm512_setr_ps(
44740 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44741 );
44742 let b = _mm512_setr_ps(
44743 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44744 );
44745 let r = _mm512_min_ps(a, b);
44746 let e = _mm512_setr_ps(
44747 0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
44748 );
44749 assert_eq_m512(r, e);
44750 }
44751
44752 #[simd_test(enable = "avx512f")]
44753 unsafe fn test_mm512_mask_min_ps() {
44754 let a = _mm512_setr_ps(
44755 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44756 );
44757 let b = _mm512_setr_ps(
44758 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44759 );
44760 let r = _mm512_mask_min_ps(a, 0, a, b);
44761 assert_eq_m512(r, a);
44762 let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b);
44763 let e = _mm512_setr_ps(
44764 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44765 );
44766 assert_eq_m512(r, e);
44767 }
44768
44769 #[simd_test(enable = "avx512f")]
44770 unsafe fn test_mm512_maskz_min_ps() {
44771 let a = _mm512_setr_ps(
44772 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44773 );
44774 let b = _mm512_setr_ps(
44775 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44776 );
44777 let r = _mm512_maskz_min_ps(0, a, b);
44778 assert_eq_m512(r, _mm512_setzero_ps());
44779 let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b);
44780 let e = _mm512_setr_ps(
44781 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
44782 );
44783 assert_eq_m512(r, e);
44784 }
44785
44786 #[simd_test(enable = "avx512f,avx512vl")]
44787 unsafe fn test_mm256_mask_min_ps() {
44788 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44789 let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44790 let r = _mm256_mask_min_ps(a, 0, a, b);
44791 assert_eq_m256(r, a);
44792 let r = _mm256_mask_min_ps(a, 0b11111111, a, b);
44793 let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
44794 assert_eq_m256(r, e);
44795 }
44796
44797 #[simd_test(enable = "avx512f,avx512vl")]
44798 unsafe fn test_mm256_maskz_min_ps() {
44799 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44800 let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44801 let r = _mm256_maskz_min_ps(0, a, b);
44802 assert_eq_m256(r, _mm256_setzero_ps());
44803 let r = _mm256_maskz_min_ps(0b11111111, a, b);
44804 let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
44805 assert_eq_m256(r, e);
44806 }
44807
44808 #[simd_test(enable = "avx512f,avx512vl")]
44809 unsafe fn test_mm_mask_min_ps() {
44810 let a = _mm_set_ps(0., 1., 2., 3.);
44811 let b = _mm_set_ps(3., 2., 1., 0.);
44812 let r = _mm_mask_min_ps(a, 0, a, b);
44813 assert_eq_m128(r, a);
44814 let r = _mm_mask_min_ps(a, 0b00001111, a, b);
44815 let e = _mm_set_ps(0., 1., 1., 0.);
44816 assert_eq_m128(r, e);
44817 }
44818
44819 #[simd_test(enable = "avx512f,avx512vl")]
44820 unsafe fn test_mm_maskz_min_ps() {
44821 let a = _mm_set_ps(0., 1., 2., 3.);
44822 let b = _mm_set_ps(3., 2., 1., 0.);
44823 let r = _mm_maskz_min_ps(0, a, b);
44824 assert_eq_m128(r, _mm_setzero_ps());
44825 let r = _mm_maskz_min_ps(0b00001111, a, b);
44826 let e = _mm_set_ps(0., 1., 1., 0.);
44827 assert_eq_m128(r, e);
44828 }
44829
44830 #[simd_test(enable = "avx512f")]
44831 unsafe fn test_mm512_min_epu32() {
44832 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44833 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44834 let r = _mm512_min_epu32(a, b);
44835 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
44836 assert_eq_m512i(r, e);
44837 }
44838
44839 #[simd_test(enable = "avx512f")]
44840 unsafe fn test_mm512_mask_min_epu32() {
44841 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44842 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44843 let r = _mm512_mask_min_epu32(a, 0, a, b);
44844 assert_eq_m512i(r, a);
44845 let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b);
44846 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44847 assert_eq_m512i(r, e);
44848 }
44849
44850 #[simd_test(enable = "avx512f")]
44851 unsafe fn test_mm512_maskz_min_epu32() {
44852 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44853 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44854 let r = _mm512_maskz_min_epu32(0, a, b);
44855 assert_eq_m512i(r, _mm512_setzero_si512());
44856 let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b);
44857 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
44858 assert_eq_m512i(r, e);
44859 }
44860
44861 #[simd_test(enable = "avx512f,avx512vl")]
44862 unsafe fn test_mm256_mask_min_epu32() {
44863 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44864 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44865 let r = _mm256_mask_min_epu32(a, 0, a, b);
44866 assert_eq_m256i(r, a);
44867 let r = _mm256_mask_min_epu32(a, 0b11111111, a, b);
44868 let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44869 assert_eq_m256i(r, e);
44870 }
44871
44872 #[simd_test(enable = "avx512f,avx512vl")]
44873 unsafe fn test_mm256_maskz_min_epu32() {
44874 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44875 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44876 let r = _mm256_maskz_min_epu32(0, a, b);
44877 assert_eq_m256i(r, _mm256_setzero_si256());
44878 let r = _mm256_maskz_min_epu32(0b11111111, a, b);
44879 let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44880 assert_eq_m256i(r, e);
44881 }
44882
44883 #[simd_test(enable = "avx512f,avx512vl")]
44884 unsafe fn test_mm_mask_min_epu32() {
44885 let a = _mm_set_epi32(0, 1, 2, 3);
44886 let b = _mm_set_epi32(3, 2, 1, 0);
44887 let r = _mm_mask_min_epu32(a, 0, a, b);
44888 assert_eq_m128i(r, a);
44889 let r = _mm_mask_min_epu32(a, 0b00001111, a, b);
44890 let e = _mm_set_epi32(0, 1, 1, 0);
44891 assert_eq_m128i(r, e);
44892 }
44893
44894 #[simd_test(enable = "avx512f,avx512vl")]
44895 unsafe fn test_mm_maskz_min_epu32() {
44896 let a = _mm_set_epi32(0, 1, 2, 3);
44897 let b = _mm_set_epi32(3, 2, 1, 0);
44898 let r = _mm_maskz_min_epu32(0, a, b);
44899 assert_eq_m128i(r, _mm_setzero_si128());
44900 let r = _mm_maskz_min_epu32(0b00001111, a, b);
44901 let e = _mm_set_epi32(0, 1, 1, 0);
44902 assert_eq_m128i(r, e);
44903 }
44904
44905 #[simd_test(enable = "avx512f")]
44906 unsafe fn test_mm512_sqrt_ps() {
44907 let a = _mm512_setr_ps(
44908 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44909 );
44910 let r = _mm512_sqrt_ps(a);
44911 let e = _mm512_setr_ps(
44912 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44913 );
44914 assert_eq_m512(r, e);
44915 }
44916
44917 #[simd_test(enable = "avx512f")]
44918 unsafe fn test_mm512_mask_sqrt_ps() {
44919 let a = _mm512_setr_ps(
44920 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44921 );
44922 let r = _mm512_mask_sqrt_ps(a, 0, a);
44923 assert_eq_m512(r, a);
44924 let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a);
44925 let e = _mm512_setr_ps(
44926 0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225.,
44927 );
44928 assert_eq_m512(r, e);
44929 }
44930
44931 #[simd_test(enable = "avx512f")]
44932 unsafe fn test_mm512_maskz_sqrt_ps() {
44933 let a = _mm512_setr_ps(
44934 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44935 );
44936 let r = _mm512_maskz_sqrt_ps(0, a);
44937 assert_eq_m512(r, _mm512_setzero_ps());
44938 let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a);
44939 let e = _mm512_setr_ps(
44940 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
44941 );
44942 assert_eq_m512(r, e);
44943 }
44944
44945 #[simd_test(enable = "avx512f,avx512vl")]
44946 unsafe fn test_mm256_mask_sqrt_ps() {
44947 let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
44948 let r = _mm256_mask_sqrt_ps(a, 0, a);
44949 assert_eq_m256(r, a);
44950 let r = _mm256_mask_sqrt_ps(a, 0b11111111, a);
44951 let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44952 assert_eq_m256(r, e);
44953 }
44954
44955 #[simd_test(enable = "avx512f,avx512vl")]
44956 unsafe fn test_mm256_maskz_sqrt_ps() {
44957 let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
44958 let r = _mm256_maskz_sqrt_ps(0, a);
44959 assert_eq_m256(r, _mm256_setzero_ps());
44960 let r = _mm256_maskz_sqrt_ps(0b11111111, a);
44961 let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44962 assert_eq_m256(r, e);
44963 }
44964
44965 #[simd_test(enable = "avx512f,avx512vl")]
44966 unsafe fn test_mm_mask_sqrt_ps() {
44967 let a = _mm_set_ps(0., 1., 4., 9.);
44968 let r = _mm_mask_sqrt_ps(a, 0, a);
44969 assert_eq_m128(r, a);
44970 let r = _mm_mask_sqrt_ps(a, 0b00001111, a);
44971 let e = _mm_set_ps(0., 1., 2., 3.);
44972 assert_eq_m128(r, e);
44973 }
44974
44975 #[simd_test(enable = "avx512f,avx512vl")]
44976 unsafe fn test_mm_maskz_sqrt_ps() {
44977 let a = _mm_set_ps(0., 1., 4., 9.);
44978 let r = _mm_maskz_sqrt_ps(0, a);
44979 assert_eq_m128(r, _mm_setzero_ps());
44980 let r = _mm_maskz_sqrt_ps(0b00001111, a);
44981 let e = _mm_set_ps(0., 1., 2., 3.);
44982 assert_eq_m128(r, e);
44983 }
44984
44985 #[simd_test(enable = "avx512f")]
44986 unsafe fn test_mm512_fmadd_ps() {
44987 let a = _mm512_set1_ps(1.);
44988 let b = _mm512_setr_ps(
44989 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44990 );
44991 let c = _mm512_set1_ps(1.);
44992 let r = _mm512_fmadd_ps(a, b, c);
44993 let e = _mm512_setr_ps(
44994 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
44995 );
44996 assert_eq_m512(r, e);
44997 }
44998
44999 #[simd_test(enable = "avx512f")]
45000 unsafe fn test_mm512_mask_fmadd_ps() {
45001 let a = _mm512_set1_ps(1.);
45002 let b = _mm512_setr_ps(
45003 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45004 );
45005 let c = _mm512_set1_ps(1.);
45006 let r = _mm512_mask_fmadd_ps(a, 0, b, c);
45007 assert_eq_m512(r, a);
45008 let r = _mm512_mask_fmadd_ps(a, 0b00000000_11111111, b, c);
45009 let e = _mm512_setr_ps(
45010 1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
45011 );
45012 assert_eq_m512(r, e);
45013 }
45014
45015 #[simd_test(enable = "avx512f")]
45016 unsafe fn test_mm512_maskz_fmadd_ps() {
45017 let a = _mm512_set1_ps(1.);
45018 let b = _mm512_setr_ps(
45019 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45020 );
45021 let c = _mm512_set1_ps(1.);
45022 let r = _mm512_maskz_fmadd_ps(0, a, b, c);
45023 assert_eq_m512(r, _mm512_setzero_ps());
45024 let r = _mm512_maskz_fmadd_ps(0b00000000_11111111, a, b, c);
45025 let e = _mm512_setr_ps(
45026 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
45027 );
45028 assert_eq_m512(r, e);
45029 }
45030
45031 #[simd_test(enable = "avx512f")]
45032 unsafe fn test_mm512_mask3_fmadd_ps() {
45033 let a = _mm512_set1_ps(1.);
45034 let b = _mm512_setr_ps(
45035 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45036 );
45037 let c = _mm512_set1_ps(2.);
45038 let r = _mm512_mask3_fmadd_ps(a, b, c, 0);
45039 assert_eq_m512(r, c);
45040 let r = _mm512_mask3_fmadd_ps(a, b, c, 0b00000000_11111111);
45041 let e = _mm512_setr_ps(
45042 2., 3., 4., 5., 6., 7., 8., 9., 2., 2., 2., 2., 2., 2., 2., 2.,
45043 );
45044 assert_eq_m512(r, e);
45045 }
45046
45047 #[simd_test(enable = "avx512f,avx512vl")]
45048 unsafe fn test_mm256_mask_fmadd_ps() {
45049 let a = _mm256_set1_ps(1.);
45050 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45051 let c = _mm256_set1_ps(1.);
45052 let r = _mm256_mask_fmadd_ps(a, 0, b, c);
45053 assert_eq_m256(r, a);
45054 let r = _mm256_mask_fmadd_ps(a, 0b11111111, b, c);
45055 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
45056 assert_eq_m256(r, e);
45057 }
45058
45059 #[simd_test(enable = "avx512f,avx512vl")]
45060 unsafe fn test_mm256_maskz_fmadd_ps() {
45061 let a = _mm256_set1_ps(1.);
45062 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45063 let c = _mm256_set1_ps(1.);
45064 let r = _mm256_maskz_fmadd_ps(0, a, b, c);
45065 assert_eq_m256(r, _mm256_setzero_ps());
45066 let r = _mm256_maskz_fmadd_ps(0b11111111, a, b, c);
45067 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
45068 assert_eq_m256(r, e);
45069 }
45070
45071 #[simd_test(enable = "avx512f,avx512vl")]
45072 unsafe fn test_mm256_mask3_fmadd_ps() {
45073 let a = _mm256_set1_ps(1.);
45074 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45075 let c = _mm256_set1_ps(1.);
45076 let r = _mm256_mask3_fmadd_ps(a, b, c, 0);
45077 assert_eq_m256(r, c);
45078 let r = _mm256_mask3_fmadd_ps(a, b, c, 0b11111111);
45079 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
45080 assert_eq_m256(r, e);
45081 }
45082
45083 #[simd_test(enable = "avx512f,avx512vl")]
45084 unsafe fn test_mm_mask_fmadd_ps() {
45085 let a = _mm_set1_ps(1.);
45086 let b = _mm_set_ps(0., 1., 2., 3.);
45087 let c = _mm_set1_ps(1.);
45088 let r = _mm_mask_fmadd_ps(a, 0, b, c);
45089 assert_eq_m128(r, a);
45090 let r = _mm_mask_fmadd_ps(a, 0b00001111, b, c);
45091 let e = _mm_set_ps(1., 2., 3., 4.);
45092 assert_eq_m128(r, e);
45093 }
45094
45095 #[simd_test(enable = "avx512f,avx512vl")]
45096 unsafe fn test_mm_maskz_fmadd_ps() {
45097 let a = _mm_set1_ps(1.);
45098 let b = _mm_set_ps(0., 1., 2., 3.);
45099 let c = _mm_set1_ps(1.);
45100 let r = _mm_maskz_fmadd_ps(0, a, b, c);
45101 assert_eq_m128(r, _mm_setzero_ps());
45102 let r = _mm_maskz_fmadd_ps(0b00001111, a, b, c);
45103 let e = _mm_set_ps(1., 2., 3., 4.);
45104 assert_eq_m128(r, e);
45105 }
45106
45107 #[simd_test(enable = "avx512f,avx512vl")]
45108 unsafe fn test_mm_mask3_fmadd_ps() {
45109 let a = _mm_set1_ps(1.);
45110 let b = _mm_set_ps(0., 1., 2., 3.);
45111 let c = _mm_set1_ps(1.);
45112 let r = _mm_mask3_fmadd_ps(a, b, c, 0);
45113 assert_eq_m128(r, c);
45114 let r = _mm_mask3_fmadd_ps(a, b, c, 0b00001111);
45115 let e = _mm_set_ps(1., 2., 3., 4.);
45116 assert_eq_m128(r, e);
45117 }
45118
45119 #[simd_test(enable = "avx512f")]
45120 unsafe fn test_mm512_fmsub_ps() {
45121 let a = _mm512_setr_ps(
45122 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45123 );
45124 let b = _mm512_setr_ps(
45125 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45126 );
45127 let c = _mm512_setr_ps(
45128 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45129 );
45130 let r = _mm512_fmsub_ps(a, b, c);
45131 let e = _mm512_setr_ps(
45132 -1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
45133 );
45134 assert_eq_m512(r, e);
45135 }
45136
45137 #[simd_test(enable = "avx512f")]
45138 unsafe fn test_mm512_mask_fmsub_ps() {
45139 let a = _mm512_set1_ps(1.);
45140 let b = _mm512_setr_ps(
45141 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45142 );
45143 let c = _mm512_set1_ps(1.);
45144 let r = _mm512_mask_fmsub_ps(a, 0, b, c);
45145 assert_eq_m512(r, a);
45146 let r = _mm512_mask_fmsub_ps(a, 0b00000000_11111111, b, c);
45147 let e = _mm512_setr_ps(
45148 -1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
45149 );
45150 assert_eq_m512(r, e);
45151 }
45152
45153 #[simd_test(enable = "avx512f")]
45154 unsafe fn test_mm512_maskz_fmsub_ps() {
45155 let a = _mm512_set1_ps(1.);
45156 let b = _mm512_setr_ps(
45157 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45158 );
45159 let c = _mm512_set1_ps(1.);
45160 let r = _mm512_maskz_fmsub_ps(0, a, b, c);
45161 assert_eq_m512(r, _mm512_setzero_ps());
45162 let r = _mm512_maskz_fmsub_ps(0b00000000_11111111, a, b, c);
45163 let e = _mm512_setr_ps(
45164 -1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
45165 );
45166 assert_eq_m512(r, e);
45167 }
45168
45169 #[simd_test(enable = "avx512f")]
45170 unsafe fn test_mm512_mask3_fmsub_ps() {
45171 let a = _mm512_set1_ps(1.);
45172 let b = _mm512_setr_ps(
45173 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45174 );
45175 let c = _mm512_setr_ps(
45176 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45177 );
45178 let r = _mm512_mask3_fmsub_ps(a, b, c, 0);
45179 assert_eq_m512(r, c);
45180 let r = _mm512_mask3_fmsub_ps(a, b, c, 0b00000000_11111111);
45181 let e = _mm512_setr_ps(
45182 -1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
45183 );
45184 assert_eq_m512(r, e);
45185 }
45186
45187 #[simd_test(enable = "avx512f,avx512vl")]
45188 unsafe fn test_mm256_mask_fmsub_ps() {
45189 let a = _mm256_set1_ps(1.);
45190 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45191 let c = _mm256_set1_ps(1.);
45192 let r = _mm256_mask_fmsub_ps(a, 0, b, c);
45193 assert_eq_m256(r, a);
45194 let r = _mm256_mask_fmsub_ps(a, 0b11111111, b, c);
45195 let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45196 assert_eq_m256(r, e);
45197 }
45198
45199 #[simd_test(enable = "avx512f,avx512vl")]
45200 unsafe fn test_mm256_maskz_fmsub_ps() {
45201 let a = _mm256_set1_ps(1.);
45202 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45203 let c = _mm256_set1_ps(1.);
45204 let r = _mm256_maskz_fmsub_ps(0, a, b, c);
45205 assert_eq_m256(r, _mm256_setzero_ps());
45206 let r = _mm256_maskz_fmsub_ps(0b11111111, a, b, c);
45207 let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45208 assert_eq_m256(r, e);
45209 }
45210
45211 #[simd_test(enable = "avx512f,avx512vl")]
45212 unsafe fn test_mm256_mask3_fmsub_ps() {
45213 let a = _mm256_set1_ps(1.);
45214 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45215 let c = _mm256_set1_ps(1.);
45216 let r = _mm256_mask3_fmsub_ps(a, b, c, 0);
45217 assert_eq_m256(r, c);
45218 let r = _mm256_mask3_fmsub_ps(a, b, c, 0b11111111);
45219 let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45220 assert_eq_m256(r, e);
45221 }
45222
45223 #[simd_test(enable = "avx512f,avx512vl")]
45224 unsafe fn test_mm_mask_fmsub_ps() {
45225 let a = _mm_set1_ps(1.);
45226 let b = _mm_set_ps(0., 1., 2., 3.);
45227 let c = _mm_set1_ps(1.);
45228 let r = _mm_mask_fmsub_ps(a, 0, b, c);
45229 assert_eq_m128(r, a);
45230 let r = _mm_mask_fmsub_ps(a, 0b00001111, b, c);
45231 let e = _mm_set_ps(-1., 0., 1., 2.);
45232 assert_eq_m128(r, e);
45233 }
45234
45235 #[simd_test(enable = "avx512f,avx512vl")]
45236 unsafe fn test_mm_maskz_fmsub_ps() {
45237 let a = _mm_set1_ps(1.);
45238 let b = _mm_set_ps(0., 1., 2., 3.);
45239 let c = _mm_set1_ps(1.);
45240 let r = _mm_maskz_fmsub_ps(0, a, b, c);
45241 assert_eq_m128(r, _mm_setzero_ps());
45242 let r = _mm_maskz_fmsub_ps(0b00001111, a, b, c);
45243 let e = _mm_set_ps(-1., 0., 1., 2.);
45244 assert_eq_m128(r, e);
45245 }
45246
45247 #[simd_test(enable = "avx512f,avx512vl")]
45248 unsafe fn test_mm_mask3_fmsub_ps() {
45249 let a = _mm_set1_ps(1.);
45250 let b = _mm_set_ps(0., 1., 2., 3.);
45251 let c = _mm_set1_ps(1.);
45252 let r = _mm_mask3_fmsub_ps(a, b, c, 0);
45253 assert_eq_m128(r, c);
45254 let r = _mm_mask3_fmsub_ps(a, b, c, 0b00001111);
45255 let e = _mm_set_ps(-1., 0., 1., 2.);
45256 assert_eq_m128(r, e);
45257 }
45258
45259 #[simd_test(enable = "avx512f")]
45260 unsafe fn test_mm512_fmaddsub_ps() {
45261 let a = _mm512_set1_ps(1.);
45262 let b = _mm512_setr_ps(
45263 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45264 );
45265 let c = _mm512_set1_ps(1.);
45266 let r = _mm512_fmaddsub_ps(a, b, c);
45267 let e = _mm512_setr_ps(
45268 -1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16.,
45269 );
45270 assert_eq_m512(r, e);
45271 }
45272
45273 #[simd_test(enable = "avx512f")]
45274 unsafe fn test_mm512_mask_fmaddsub_ps() {
45275 let a = _mm512_set1_ps(1.);
45276 let b = _mm512_setr_ps(
45277 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45278 );
45279 let c = _mm512_set1_ps(1.);
45280 let r = _mm512_mask_fmaddsub_ps(a, 0, b, c);
45281 assert_eq_m512(r, a);
45282 let r = _mm512_mask_fmaddsub_ps(a, 0b00000000_11111111, b, c);
45283 let e = _mm512_setr_ps(
45284 -1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
45285 );
45286 assert_eq_m512(r, e);
45287 }
45288
45289 #[simd_test(enable = "avx512f")]
45290 unsafe fn test_mm512_maskz_fmaddsub_ps() {
45291 let a = _mm512_set1_ps(1.);
45292 let b = _mm512_setr_ps(
45293 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45294 );
45295 let c = _mm512_set1_ps(1.);
45296 let r = _mm512_maskz_fmaddsub_ps(0, a, b, c);
45297 assert_eq_m512(r, _mm512_setzero_ps());
45298 let r = _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a, b, c);
45299 let e = _mm512_setr_ps(
45300 -1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
45301 );
45302 assert_eq_m512(r, e);
45303 }
45304
45305 #[simd_test(enable = "avx512f")]
45306 unsafe fn test_mm512_mask3_fmaddsub_ps() {
45307 let a = _mm512_set1_ps(1.);
45308 let b = _mm512_setr_ps(
45309 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45310 );
45311 let c = _mm512_setr_ps(
45312 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45313 );
45314 let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0);
45315 assert_eq_m512(r, c);
45316 let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0b00000000_11111111);
45317 let e = _mm512_setr_ps(
45318 -1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
45319 );
45320 assert_eq_m512(r, e);
45321 }
45322
45323 #[simd_test(enable = "avx512f,avx512vl")]
45324 unsafe fn test_mm256_mask_fmaddsub_ps() {
45325 let a = _mm256_set1_ps(1.);
45326 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45327 let c = _mm256_set1_ps(1.);
45328 let r = _mm256_mask_fmaddsub_ps(a, 0, b, c);
45329 assert_eq_m256(r, a);
45330 let r = _mm256_mask_fmaddsub_ps(a, 0b11111111, b, c);
45331 let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45332 assert_eq_m256(r, e);
45333 }
45334
45335 #[simd_test(enable = "avx512f,avx512vl")]
45336 unsafe fn test_mm256_maskz_fmaddsub_ps() {
45337 let a = _mm256_set1_ps(1.);
45338 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45339 let c = _mm256_set1_ps(1.);
45340 let r = _mm256_maskz_fmaddsub_ps(0, a, b, c);
45341 assert_eq_m256(r, _mm256_setzero_ps());
45342 let r = _mm256_maskz_fmaddsub_ps(0b11111111, a, b, c);
45343 let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45344 assert_eq_m256(r, e);
45345 }
45346
45347 #[simd_test(enable = "avx512f,avx512vl")]
45348 unsafe fn test_mm256_mask3_fmaddsub_ps() {
45349 let a = _mm256_set1_ps(1.);
45350 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45351 let c = _mm256_set1_ps(1.);
45352 let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0);
45353 assert_eq_m256(r, c);
45354 let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0b11111111);
45355 let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45356 assert_eq_m256(r, e);
45357 }
45358
45359 #[simd_test(enable = "avx512f,avx512vl")]
45360 unsafe fn test_mm_mask_fmaddsub_ps() {
45361 let a = _mm_set1_ps(1.);
45362 let b = _mm_set_ps(0., 1., 2., 3.);
45363 let c = _mm_set1_ps(1.);
45364 let r = _mm_mask_fmaddsub_ps(a, 0, b, c);
45365 assert_eq_m128(r, a);
45366 let r = _mm_mask_fmaddsub_ps(a, 0b00001111, b, c);
45367 let e = _mm_set_ps(1., 0., 3., 2.);
45368 assert_eq_m128(r, e);
45369 }
45370
45371 #[simd_test(enable = "avx512f,avx512vl")]
45372 unsafe fn test_mm_maskz_fmaddsub_ps() {
45373 let a = _mm_set1_ps(1.);
45374 let b = _mm_set_ps(0., 1., 2., 3.);
45375 let c = _mm_set1_ps(1.);
45376 let r = _mm_maskz_fmaddsub_ps(0, a, b, c);
45377 assert_eq_m128(r, _mm_setzero_ps());
45378 let r = _mm_maskz_fmaddsub_ps(0b00001111, a, b, c);
45379 let e = _mm_set_ps(1., 0., 3., 2.);
45380 assert_eq_m128(r, e);
45381 }
45382
45383 #[simd_test(enable = "avx512f,avx512vl")]
45384 unsafe fn test_mm_mask3_fmaddsub_ps() {
45385 let a = _mm_set1_ps(1.);
45386 let b = _mm_set_ps(0., 1., 2., 3.);
45387 let c = _mm_set1_ps(1.);
45388 let r = _mm_mask3_fmaddsub_ps(a, b, c, 0);
45389 assert_eq_m128(r, c);
45390 let r = _mm_mask3_fmaddsub_ps(a, b, c, 0b00001111);
45391 let e = _mm_set_ps(1., 0., 3., 2.);
45392 assert_eq_m128(r, e);
45393 }
45394
45395 #[simd_test(enable = "avx512f")]
45396 unsafe fn test_mm512_fmsubadd_ps() {
45397 let a = _mm512_setr_ps(
45398 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45399 );
45400 let b = _mm512_setr_ps(
45401 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45402 );
45403 let c = _mm512_setr_ps(
45404 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45405 );
45406 let r = _mm512_fmsubadd_ps(a, b, c);
45407 let e = _mm512_setr_ps(
45408 1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14.,
45409 );
45410 assert_eq_m512(r, e);
45411 }
45412
45413 #[simd_test(enable = "avx512f")]
45414 unsafe fn test_mm512_mask_fmsubadd_ps() {
45415 let a = _mm512_set1_ps(1.);
45416 let b = _mm512_setr_ps(
45417 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45418 );
45419 let c = _mm512_set1_ps(1.);
45420 let r = _mm512_mask_fmsubadd_ps(a, 0, b, c);
45421 assert_eq_m512(r, a);
45422 let r = _mm512_mask_fmsubadd_ps(a, 0b00000000_11111111, b, c);
45423 let e = _mm512_setr_ps(
45424 1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
45425 );
45426 assert_eq_m512(r, e);
45427 }
45428
45429 #[simd_test(enable = "avx512f")]
45430 unsafe fn test_mm512_maskz_fmsubadd_ps() {
45431 let a = _mm512_set1_ps(1.);
45432 let b = _mm512_setr_ps(
45433 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45434 );
45435 let c = _mm512_set1_ps(1.);
45436 let r = _mm512_maskz_fmsubadd_ps(0, a, b, c);
45437 assert_eq_m512(r, _mm512_setzero_ps());
45438 let r = _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a, b, c);
45439 let e = _mm512_setr_ps(
45440 1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
45441 );
45442 assert_eq_m512(r, e);
45443 }
45444
45445 #[simd_test(enable = "avx512f")]
45446 unsafe fn test_mm512_mask3_fmsubadd_ps() {
45447 let a = _mm512_set1_ps(1.);
45448 let b = _mm512_setr_ps(
45449 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45450 );
45451 let c = _mm512_setr_ps(
45452 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45453 );
45454 let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0);
45455 assert_eq_m512(r, c);
45456 let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0b00000000_11111111);
45457 let e = _mm512_setr_ps(
45458 1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
45459 );
45460 assert_eq_m512(r, e);
45461 }
45462
45463 #[simd_test(enable = "avx512f,avx512vl")]
45464 unsafe fn test_mm256_mask_fmsubadd_ps() {
45465 let a = _mm256_set1_ps(1.);
45466 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45467 let c = _mm256_set1_ps(1.);
45468 let r = _mm256_mask_fmsubadd_ps(a, 0, b, c);
45469 assert_eq_m256(r, a);
45470 let r = _mm256_mask_fmsubadd_ps(a, 0b11111111, b, c);
45471 let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45472 assert_eq_m256(r, e);
45473 }
45474
45475 #[simd_test(enable = "avx512f,avx512vl")]
45476 unsafe fn test_mm256_maskz_fmsubadd_ps() {
45477 let a = _mm256_set1_ps(1.);
45478 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45479 let c = _mm256_set1_ps(1.);
45480 let r = _mm256_maskz_fmsubadd_ps(0, a, b, c);
45481 assert_eq_m256(r, _mm256_setzero_ps());
45482 let r = _mm256_maskz_fmsubadd_ps(0b11111111, a, b, c);
45483 let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45484 assert_eq_m256(r, e);
45485 }
45486
45487 #[simd_test(enable = "avx512f,avx512vl")]
45488 unsafe fn test_mm256_mask3_fmsubadd_ps() {
45489 let a = _mm256_set1_ps(1.);
45490 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45491 let c = _mm256_set1_ps(1.);
45492 let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0);
45493 assert_eq_m256(r, c);
45494 let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0b11111111);
45495 let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45496 assert_eq_m256(r, e);
45497 }
45498
45499 #[simd_test(enable = "avx512f,avx512vl")]
45500 unsafe fn test_mm_mask_fmsubadd_ps() {
45501 let a = _mm_set1_ps(1.);
45502 let b = _mm_set_ps(0., 1., 2., 3.);
45503 let c = _mm_set1_ps(1.);
45504 let r = _mm_mask_fmsubadd_ps(a, 0, b, c);
45505 assert_eq_m128(r, a);
45506 let r = _mm_mask_fmsubadd_ps(a, 0b00001111, b, c);
45507 let e = _mm_set_ps(-1., 2., 1., 4.);
45508 assert_eq_m128(r, e);
45509 }
45510
45511 #[simd_test(enable = "avx512f,avx512vl")]
45512 unsafe fn test_mm_maskz_fmsubadd_ps() {
45513 let a = _mm_set1_ps(1.);
45514 let b = _mm_set_ps(0., 1., 2., 3.);
45515 let c = _mm_set1_ps(1.);
45516 let r = _mm_maskz_fmsubadd_ps(0, a, b, c);
45517 assert_eq_m128(r, _mm_setzero_ps());
45518 let r = _mm_maskz_fmsubadd_ps(0b00001111, a, b, c);
45519 let e = _mm_set_ps(-1., 2., 1., 4.);
45520 assert_eq_m128(r, e);
45521 }
45522
45523 #[simd_test(enable = "avx512f,avx512vl")]
45524 unsafe fn test_mm_mask3_fmsubadd_ps() {
45525 let a = _mm_set1_ps(1.);
45526 let b = _mm_set_ps(0., 1., 2., 3.);
45527 let c = _mm_set1_ps(1.);
45528 let r = _mm_mask3_fmsubadd_ps(a, b, c, 0);
45529 assert_eq_m128(r, c);
45530 let r = _mm_mask3_fmsubadd_ps(a, b, c, 0b00001111);
45531 let e = _mm_set_ps(-1., 2., 1., 4.);
45532 assert_eq_m128(r, e);
45533 }
45534
45535 #[simd_test(enable = "avx512f")]
45536 unsafe fn test_mm512_fnmadd_ps() {
45537 let a = _mm512_set1_ps(1.);
45538 let b = _mm512_setr_ps(
45539 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45540 );
45541 let c = _mm512_set1_ps(1.);
45542 let r = _mm512_fnmadd_ps(a, b, c);
45543 let e = _mm512_setr_ps(
45544 1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14.,
45545 );
45546 assert_eq_m512(r, e);
45547 }
45548
45549 #[simd_test(enable = "avx512f")]
45550 unsafe fn test_mm512_mask_fnmadd_ps() {
45551 let a = _mm512_set1_ps(1.);
45552 let b = _mm512_setr_ps(
45553 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45554 );
45555 let c = _mm512_set1_ps(1.);
45556 let r = _mm512_mask_fnmadd_ps(a, 0, b, c);
45557 assert_eq_m512(r, a);
45558 let r = _mm512_mask_fnmadd_ps(a, 0b00000000_11111111, b, c);
45559 let e = _mm512_setr_ps(
45560 1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1.,
45561 );
45562 assert_eq_m512(r, e);
45563 }
45564
45565 #[simd_test(enable = "avx512f")]
45566 unsafe fn test_mm512_maskz_fnmadd_ps() {
45567 let a = _mm512_set1_ps(1.);
45568 let b = _mm512_setr_ps(
45569 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45570 );
45571 let c = _mm512_set1_ps(1.);
45572 let r = _mm512_maskz_fnmadd_ps(0, a, b, c);
45573 assert_eq_m512(r, _mm512_setzero_ps());
45574 let r = _mm512_maskz_fnmadd_ps(0b00000000_11111111, a, b, c);
45575 let e = _mm512_setr_ps(
45576 1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0.,
45577 );
45578 assert_eq_m512(r, e);
45579 }
45580
45581 #[simd_test(enable = "avx512f")]
45582 unsafe fn test_mm512_mask3_fnmadd_ps() {
45583 let a = _mm512_set1_ps(1.);
45584 let b = _mm512_setr_ps(
45585 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45586 );
45587 let c = _mm512_setr_ps(
45588 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45589 );
45590 let r = _mm512_mask3_fnmadd_ps(a, b, c, 0);
45591 assert_eq_m512(r, c);
45592 let r = _mm512_mask3_fnmadd_ps(a, b, c, 0b00000000_11111111);
45593 let e = _mm512_setr_ps(
45594 1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2.,
45595 );
45596 assert_eq_m512(r, e);
45597 }
45598
45599 #[simd_test(enable = "avx512f,avx512vl")]
45600 unsafe fn test_mm256_mask_fnmadd_ps() {
45601 let a = _mm256_set1_ps(1.);
45602 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45603 let c = _mm256_set1_ps(1.);
45604 let r = _mm256_mask_fnmadd_ps(a, 0, b, c);
45605 assert_eq_m256(r, a);
45606 let r = _mm256_mask_fnmadd_ps(a, 0b11111111, b, c);
45607 let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45608 assert_eq_m256(r, e);
45609 }
45610
45611 #[simd_test(enable = "avx512f,avx512vl")]
45612 unsafe fn test_mm256_maskz_fnmadd_ps() {
45613 let a = _mm256_set1_ps(1.);
45614 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45615 let c = _mm256_set1_ps(1.);
45616 let r = _mm256_maskz_fnmadd_ps(0, a, b, c);
45617 assert_eq_m256(r, _mm256_setzero_ps());
45618 let r = _mm256_maskz_fnmadd_ps(0b11111111, a, b, c);
45619 let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45620 assert_eq_m256(r, e);
45621 }
45622
45623 #[simd_test(enable = "avx512f,avx512vl")]
45624 unsafe fn test_mm256_mask3_fnmadd_ps() {
45625 let a = _mm256_set1_ps(1.);
45626 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45627 let c = _mm256_set1_ps(1.);
45628 let r = _mm256_mask3_fnmadd_ps(a, b, c, 0);
45629 assert_eq_m256(r, c);
45630 let r = _mm256_mask3_fnmadd_ps(a, b, c, 0b11111111);
45631 let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45632 assert_eq_m256(r, e);
45633 }
45634
45635 #[simd_test(enable = "avx512f,avx512vl")]
45636 unsafe fn test_mm_mask_fnmadd_ps() {
45637 let a = _mm_set1_ps(1.);
45638 let b = _mm_set_ps(0., 1., 2., 3.);
45639 let c = _mm_set1_ps(1.);
45640 let r = _mm_mask_fnmadd_ps(a, 0, b, c);
45641 assert_eq_m128(r, a);
45642 let r = _mm_mask_fnmadd_ps(a, 0b00001111, b, c);
45643 let e = _mm_set_ps(1., 0., -1., -2.);
45644 assert_eq_m128(r, e);
45645 }
45646
45647 #[simd_test(enable = "avx512f,avx512vl")]
45648 unsafe fn test_mm_maskz_fnmadd_ps() {
45649 let a = _mm_set1_ps(1.);
45650 let b = _mm_set_ps(0., 1., 2., 3.);
45651 let c = _mm_set1_ps(1.);
45652 let r = _mm_maskz_fnmadd_ps(0, a, b, c);
45653 assert_eq_m128(r, _mm_setzero_ps());
45654 let r = _mm_maskz_fnmadd_ps(0b00001111, a, b, c);
45655 let e = _mm_set_ps(1., 0., -1., -2.);
45656 assert_eq_m128(r, e);
45657 }
45658
45659 #[simd_test(enable = "avx512f,avx512vl")]
45660 unsafe fn test_mm_mask3_fnmadd_ps() {
45661 let a = _mm_set1_ps(1.);
45662 let b = _mm_set_ps(0., 1., 2., 3.);
45663 let c = _mm_set1_ps(1.);
45664 let r = _mm_mask3_fnmadd_ps(a, b, c, 0);
45665 assert_eq_m128(r, c);
45666 let r = _mm_mask3_fnmadd_ps(a, b, c, 0b00001111);
45667 let e = _mm_set_ps(1., 0., -1., -2.);
45668 assert_eq_m128(r, e);
45669 }
45670
45671 #[simd_test(enable = "avx512f")]
45672 unsafe fn test_mm512_fnmsub_ps() {
45673 let a = _mm512_set1_ps(1.);
45674 let b = _mm512_setr_ps(
45675 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45676 );
45677 let c = _mm512_set1_ps(1.);
45678 let r = _mm512_fnmsub_ps(a, b, c);
45679 let e = _mm512_setr_ps(
45680 -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16.,
45681 );
45682 assert_eq_m512(r, e);
45683 }
45684
45685 #[simd_test(enable = "avx512f")]
45686 unsafe fn test_mm512_mask_fnmsub_ps() {
45687 let a = _mm512_set1_ps(1.);
45688 let b = _mm512_setr_ps(
45689 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45690 );
45691 let c = _mm512_set1_ps(1.);
45692 let r = _mm512_mask_fnmsub_ps(a, 0, b, c);
45693 assert_eq_m512(r, a);
45694 let r = _mm512_mask_fnmsub_ps(a, 0b00000000_11111111, b, c);
45695 let e = _mm512_setr_ps(
45696 -1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1.,
45697 );
45698 assert_eq_m512(r, e);
45699 }
45700
45701 #[simd_test(enable = "avx512f")]
45702 unsafe fn test_mm512_maskz_fnmsub_ps() {
45703 let a = _mm512_set1_ps(1.);
45704 let b = _mm512_setr_ps(
45705 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45706 );
45707 let c = _mm512_set1_ps(1.);
45708 let r = _mm512_maskz_fnmsub_ps(0, a, b, c);
45709 assert_eq_m512(r, _mm512_setzero_ps());
45710 let r = _mm512_maskz_fnmsub_ps(0b00000000_11111111, a, b, c);
45711 let e = _mm512_setr_ps(
45712 -1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
45713 );
45714 assert_eq_m512(r, e);
45715 }
45716
45717 #[simd_test(enable = "avx512f")]
45718 unsafe fn test_mm512_mask3_fnmsub_ps() {
45719 let a = _mm512_set1_ps(1.);
45720 let b = _mm512_setr_ps(
45721 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45722 );
45723 let c = _mm512_setr_ps(
45724 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45725 );
45726 let r = _mm512_mask3_fnmsub_ps(a, b, c, 0);
45727 assert_eq_m512(r, c);
45728 let r = _mm512_mask3_fnmsub_ps(a, b, c, 0b00000000_11111111);
45729 let e = _mm512_setr_ps(
45730 -1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2.,
45731 );
45732 assert_eq_m512(r, e);
45733 }
45734
45735 #[simd_test(enable = "avx512f,avx512vl")]
45736 unsafe fn test_mm256_mask_fnmsub_ps() {
45737 let a = _mm256_set1_ps(1.);
45738 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45739 let c = _mm256_set1_ps(1.);
45740 let r = _mm256_mask_fnmsub_ps(a, 0, b, c);
45741 assert_eq_m256(r, a);
45742 let r = _mm256_mask_fnmsub_ps(a, 0b11111111, b, c);
45743 let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45744 assert_eq_m256(r, e);
45745 }
45746
45747 #[simd_test(enable = "avx512f,avx512vl")]
45748 unsafe fn test_mm256_maskz_fnmsub_ps() {
45749 let a = _mm256_set1_ps(1.);
45750 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45751 let c = _mm256_set1_ps(1.);
45752 let r = _mm256_maskz_fnmsub_ps(0, a, b, c);
45753 assert_eq_m256(r, _mm256_setzero_ps());
45754 let r = _mm256_maskz_fnmsub_ps(0b11111111, a, b, c);
45755 let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45756 assert_eq_m256(r, e);
45757 }
45758
45759 #[simd_test(enable = "avx512f,avx512vl")]
45760 unsafe fn test_mm256_mask3_fnmsub_ps() {
45761 let a = _mm256_set1_ps(1.);
45762 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45763 let c = _mm256_set1_ps(1.);
45764 let r = _mm256_mask3_fnmsub_ps(a, b, c, 0);
45765 assert_eq_m256(r, c);
45766 let r = _mm256_mask3_fnmsub_ps(a, b, c, 0b11111111);
45767 let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45768 assert_eq_m256(r, e);
45769 }
45770
45771 #[simd_test(enable = "avx512f,avx512vl")]
45772 unsafe fn test_mm_mask_fnmsub_ps() {
45773 let a = _mm_set1_ps(1.);
45774 let b = _mm_set_ps(0., 1., 2., 3.);
45775 let c = _mm_set1_ps(1.);
45776 let r = _mm_mask_fnmsub_ps(a, 0, b, c);
45777 assert_eq_m128(r, a);
45778 let r = _mm_mask_fnmsub_ps(a, 0b00001111, b, c);
45779 let e = _mm_set_ps(-1., -2., -3., -4.);
45780 assert_eq_m128(r, e);
45781 }
45782
45783 #[simd_test(enable = "avx512f,avx512vl")]
45784 unsafe fn test_mm_maskz_fnmsub_ps() {
45785 let a = _mm_set1_ps(1.);
45786 let b = _mm_set_ps(0., 1., 2., 3.);
45787 let c = _mm_set1_ps(1.);
45788 let r = _mm_maskz_fnmsub_ps(0, a, b, c);
45789 assert_eq_m128(r, _mm_setzero_ps());
45790 let r = _mm_maskz_fnmsub_ps(0b00001111, a, b, c);
45791 let e = _mm_set_ps(-1., -2., -3., -4.);
45792 assert_eq_m128(r, e);
45793 }
45794
45795 #[simd_test(enable = "avx512f,avx512vl")]
45796 unsafe fn test_mm_mask3_fnmsub_ps() {
45797 let a = _mm_set1_ps(1.);
45798 let b = _mm_set_ps(0., 1., 2., 3.);
45799 let c = _mm_set1_ps(1.);
45800 let r = _mm_mask3_fnmsub_ps(a, b, c, 0);
45801 assert_eq_m128(r, c);
45802 let r = _mm_mask3_fnmsub_ps(a, b, c, 0b00001111);
45803 let e = _mm_set_ps(-1., -2., -3., -4.);
45804 assert_eq_m128(r, e);
45805 }
45806
45807 #[simd_test(enable = "avx512f")]
45808 unsafe fn test_mm512_rcp14_ps() {
45809 let a = _mm512_set1_ps(3.);
45810 let r = _mm512_rcp14_ps(a);
45811 let e = _mm512_set1_ps(0.33333206);
45812 assert_eq_m512(r, e);
45813 }
45814
45815 #[simd_test(enable = "avx512f")]
45816 unsafe fn test_mm512_mask_rcp14_ps() {
45817 let a = _mm512_set1_ps(3.);
45818 let r = _mm512_mask_rcp14_ps(a, 0, a);
45819 assert_eq_m512(r, a);
45820 let r = _mm512_mask_rcp14_ps(a, 0b11111111_00000000, a);
45821 let e = _mm512_setr_ps(
45822 3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45823 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45824 );
45825 assert_eq_m512(r, e);
45826 }
45827
45828 #[simd_test(enable = "avx512f")]
45829 unsafe fn test_mm512_maskz_rcp14_ps() {
45830 let a = _mm512_set1_ps(3.);
45831 let r = _mm512_maskz_rcp14_ps(0, a);
45832 assert_eq_m512(r, _mm512_setzero_ps());
45833 let r = _mm512_maskz_rcp14_ps(0b11111111_00000000, a);
45834 let e = _mm512_setr_ps(
45835 0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45836 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45837 );
45838 assert_eq_m512(r, e);
45839 }
45840
45841 #[simd_test(enable = "avx512f,avx512vl")]
45842 unsafe fn test_mm256_rcp14_ps() {
45843 let a = _mm256_set1_ps(3.);
45844 let r = _mm256_rcp14_ps(a);
45845 let e = _mm256_set1_ps(0.33333206);
45846 assert_eq_m256(r, e);
45847 }
45848
45849 #[simd_test(enable = "avx512f,avx512vl")]
45850 unsafe fn test_mm256_mask_rcp14_ps() {
45851 let a = _mm256_set1_ps(3.);
45852 let r = _mm256_mask_rcp14_ps(a, 0, a);
45853 assert_eq_m256(r, a);
45854 let r = _mm256_mask_rcp14_ps(a, 0b11111111, a);
45855 let e = _mm256_set1_ps(0.33333206);
45856 assert_eq_m256(r, e);
45857 }
45858
45859 #[simd_test(enable = "avx512f,avx512vl")]
45860 unsafe fn test_mm256_maskz_rcp14_ps() {
45861 let a = _mm256_set1_ps(3.);
45862 let r = _mm256_maskz_rcp14_ps(0, a);
45863 assert_eq_m256(r, _mm256_setzero_ps());
45864 let r = _mm256_maskz_rcp14_ps(0b11111111, a);
45865 let e = _mm256_set1_ps(0.33333206);
45866 assert_eq_m256(r, e);
45867 }
45868
45869 #[simd_test(enable = "avx512f,avx512vl")]
45870 unsafe fn test_mm_rcp14_ps() {
45871 let a = _mm_set1_ps(3.);
45872 let r = _mm_rcp14_ps(a);
45873 let e = _mm_set1_ps(0.33333206);
45874 assert_eq_m128(r, e);
45875 }
45876
45877 #[simd_test(enable = "avx512f,avx512vl")]
45878 unsafe fn test_mm_mask_rcp14_ps() {
45879 let a = _mm_set1_ps(3.);
45880 let r = _mm_mask_rcp14_ps(a, 0, a);
45881 assert_eq_m128(r, a);
45882 let r = _mm_mask_rcp14_ps(a, 0b00001111, a);
45883 let e = _mm_set1_ps(0.33333206);
45884 assert_eq_m128(r, e);
45885 }
45886
45887 #[simd_test(enable = "avx512f,avx512vl")]
45888 unsafe fn test_mm_maskz_rcp14_ps() {
45889 let a = _mm_set1_ps(3.);
45890 let r = _mm_maskz_rcp14_ps(0, a);
45891 assert_eq_m128(r, _mm_setzero_ps());
45892 let r = _mm_maskz_rcp14_ps(0b00001111, a);
45893 let e = _mm_set1_ps(0.33333206);
45894 assert_eq_m128(r, e);
45895 }
45896
45897 #[simd_test(enable = "avx512f")]
45898 unsafe fn test_mm512_rsqrt14_ps() {
45899 let a = _mm512_set1_ps(3.);
45900 let r = _mm512_rsqrt14_ps(a);
45901 let e = _mm512_set1_ps(0.5773392);
45902 assert_eq_m512(r, e);
45903 }
45904
45905 #[simd_test(enable = "avx512f")]
45906 unsafe fn test_mm512_mask_rsqrt14_ps() {
45907 let a = _mm512_set1_ps(3.);
45908 let r = _mm512_mask_rsqrt14_ps(a, 0, a);
45909 assert_eq_m512(r, a);
45910 let r = _mm512_mask_rsqrt14_ps(a, 0b11111111_00000000, a);
45911 let e = _mm512_setr_ps(
45912 3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
45913 0.5773392, 0.5773392, 0.5773392,
45914 );
45915 assert_eq_m512(r, e);
45916 }
45917
45918 #[simd_test(enable = "avx512f")]
45919 unsafe fn test_mm512_maskz_rsqrt14_ps() {
45920 let a = _mm512_set1_ps(3.);
45921 let r = _mm512_maskz_rsqrt14_ps(0, a);
45922 assert_eq_m512(r, _mm512_setzero_ps());
45923 let r = _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a);
45924 let e = _mm512_setr_ps(
45925 0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
45926 0.5773392, 0.5773392, 0.5773392,
45927 );
45928 assert_eq_m512(r, e);
45929 }
45930
45931 #[simd_test(enable = "avx512f,avx512vl")]
45932 unsafe fn test_mm256_rsqrt14_ps() {
45933 let a = _mm256_set1_ps(3.);
45934 let r = _mm256_rsqrt14_ps(a);
45935 let e = _mm256_set1_ps(0.5773392);
45936 assert_eq_m256(r, e);
45937 }
45938
45939 #[simd_test(enable = "avx512f,avx512vl")]
45940 unsafe fn test_mm256_mask_rsqrt14_ps() {
45941 let a = _mm256_set1_ps(3.);
45942 let r = _mm256_mask_rsqrt14_ps(a, 0, a);
45943 assert_eq_m256(r, a);
45944 let r = _mm256_mask_rsqrt14_ps(a, 0b11111111, a);
45945 let e = _mm256_set1_ps(0.5773392);
45946 assert_eq_m256(r, e);
45947 }
45948
45949 #[simd_test(enable = "avx512f,avx512vl")]
45950 unsafe fn test_mm256_maskz_rsqrt14_ps() {
45951 let a = _mm256_set1_ps(3.);
45952 let r = _mm256_maskz_rsqrt14_ps(0, a);
45953 assert_eq_m256(r, _mm256_setzero_ps());
45954 let r = _mm256_maskz_rsqrt14_ps(0b11111111, a);
45955 let e = _mm256_set1_ps(0.5773392);
45956 assert_eq_m256(r, e);
45957 }
45958
45959 #[simd_test(enable = "avx512f,avx512vl")]
45960 unsafe fn test_mm_rsqrt14_ps() {
45961 let a = _mm_set1_ps(3.);
45962 let r = _mm_rsqrt14_ps(a);
45963 let e = _mm_set1_ps(0.5773392);
45964 assert_eq_m128(r, e);
45965 }
45966
45967 #[simd_test(enable = "avx512f,avx512vl")]
45968 unsafe fn test_mm_mask_rsqrt14_ps() {
45969 let a = _mm_set1_ps(3.);
45970 let r = _mm_mask_rsqrt14_ps(a, 0, a);
45971 assert_eq_m128(r, a);
45972 let r = _mm_mask_rsqrt14_ps(a, 0b00001111, a);
45973 let e = _mm_set1_ps(0.5773392);
45974 assert_eq_m128(r, e);
45975 }
45976
45977 #[simd_test(enable = "avx512f,avx512vl")]
45978 unsafe fn test_mm_maskz_rsqrt14_ps() {
45979 let a = _mm_set1_ps(3.);
45980 let r = _mm_maskz_rsqrt14_ps(0, a);
45981 assert_eq_m128(r, _mm_setzero_ps());
45982 let r = _mm_maskz_rsqrt14_ps(0b00001111, a);
45983 let e = _mm_set1_ps(0.5773392);
45984 assert_eq_m128(r, e);
45985 }
45986
45987 #[simd_test(enable = "avx512f")]
45988 unsafe fn test_mm512_getexp_ps() {
45989 let a = _mm512_set1_ps(3.);
45990 let r = _mm512_getexp_ps(a);
45991 let e = _mm512_set1_ps(1.);
45992 assert_eq_m512(r, e);
45993 }
45994
45995 #[simd_test(enable = "avx512f")]
45996 unsafe fn test_mm512_mask_getexp_ps() {
45997 let a = _mm512_set1_ps(3.);
45998 let r = _mm512_mask_getexp_ps(a, 0, a);
45999 assert_eq_m512(r, a);
46000 let r = _mm512_mask_getexp_ps(a, 0b11111111_00000000, a);
46001 let e = _mm512_setr_ps(
46002 3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
46003 );
46004 assert_eq_m512(r, e);
46005 }
46006
46007 #[simd_test(enable = "avx512f")]
46008 unsafe fn test_mm512_maskz_getexp_ps() {
46009 let a = _mm512_set1_ps(3.);
46010 let r = _mm512_maskz_getexp_ps(0, a);
46011 assert_eq_m512(r, _mm512_setzero_ps());
46012 let r = _mm512_maskz_getexp_ps(0b11111111_00000000, a);
46013 let e = _mm512_setr_ps(
46014 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
46015 );
46016 assert_eq_m512(r, e);
46017 }
46018
46019 #[simd_test(enable = "avx512f,avx512vl")]
46020 unsafe fn test_mm256_getexp_ps() {
46021 let a = _mm256_set1_ps(3.);
46022 let r = _mm256_getexp_ps(a);
46023 let e = _mm256_set1_ps(1.);
46024 assert_eq_m256(r, e);
46025 }
46026
46027 #[simd_test(enable = "avx512f,avx512vl")]
46028 unsafe fn test_mm256_mask_getexp_ps() {
46029 let a = _mm256_set1_ps(3.);
46030 let r = _mm256_mask_getexp_ps(a, 0, a);
46031 assert_eq_m256(r, a);
46032 let r = _mm256_mask_getexp_ps(a, 0b11111111, a);
46033 let e = _mm256_set1_ps(1.);
46034 assert_eq_m256(r, e);
46035 }
46036
46037 #[simd_test(enable = "avx512f,avx512vl")]
46038 unsafe fn test_mm256_maskz_getexp_ps() {
46039 let a = _mm256_set1_ps(3.);
46040 let r = _mm256_maskz_getexp_ps(0, a);
46041 assert_eq_m256(r, _mm256_setzero_ps());
46042 let r = _mm256_maskz_getexp_ps(0b11111111, a);
46043 let e = _mm256_set1_ps(1.);
46044 assert_eq_m256(r, e);
46045 }
46046
46047 #[simd_test(enable = "avx512f,avx512vl")]
46048 unsafe fn test_mm_getexp_ps() {
46049 let a = _mm_set1_ps(3.);
46050 let r = _mm_getexp_ps(a);
46051 let e = _mm_set1_ps(1.);
46052 assert_eq_m128(r, e);
46053 }
46054
46055 #[simd_test(enable = "avx512f,avx512vl")]
46056 unsafe fn test_mm_mask_getexp_ps() {
46057 let a = _mm_set1_ps(3.);
46058 let r = _mm_mask_getexp_ps(a, 0, a);
46059 assert_eq_m128(r, a);
46060 let r = _mm_mask_getexp_ps(a, 0b00001111, a);
46061 let e = _mm_set1_ps(1.);
46062 assert_eq_m128(r, e);
46063 }
46064
46065 #[simd_test(enable = "avx512f,avx512vl")]
46066 unsafe fn test_mm_maskz_getexp_ps() {
46067 let a = _mm_set1_ps(3.);
46068 let r = _mm_maskz_getexp_ps(0, a);
46069 assert_eq_m128(r, _mm_setzero_ps());
46070 let r = _mm_maskz_getexp_ps(0b00001111, a);
46071 let e = _mm_set1_ps(1.);
46072 assert_eq_m128(r, e);
46073 }
46074
46075 #[simd_test(enable = "avx512f")]
46076 unsafe fn test_mm512_roundscale_ps() {
46077 let a = _mm512_set1_ps(1.1);
46078 let r = _mm512_roundscale_ps::<0b00_00_00_00>(a);
46079 let e = _mm512_set1_ps(1.0);
46080 assert_eq_m512(r, e);
46081 }
46082
46083 #[simd_test(enable = "avx512f")]
46084 unsafe fn test_mm512_mask_roundscale_ps() {
46085 let a = _mm512_set1_ps(1.1);
46086 let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46087 let e = _mm512_set1_ps(1.1);
46088 assert_eq_m512(r, e);
46089 let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111_11111111, a);
46090 let e = _mm512_set1_ps(1.0);
46091 assert_eq_m512(r, e);
46092 }
46093
46094 #[simd_test(enable = "avx512f")]
46095 unsafe fn test_mm512_maskz_roundscale_ps() {
46096 let a = _mm512_set1_ps(1.1);
46097 let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46098 assert_eq_m512(r, _mm512_setzero_ps());
46099 let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111_11111111, a);
46100 let e = _mm512_set1_ps(1.0);
46101 assert_eq_m512(r, e);
46102 }
46103
46104 #[simd_test(enable = "avx512f,avx512vl")]
46105 unsafe fn test_mm256_roundscale_ps() {
46106 let a = _mm256_set1_ps(1.1);
46107 let r = _mm256_roundscale_ps::<0b00_00_00_00>(a);
46108 let e = _mm256_set1_ps(1.0);
46109 assert_eq_m256(r, e);
46110 }
46111
46112 #[simd_test(enable = "avx512f,avx512vl")]
46113 unsafe fn test_mm256_mask_roundscale_ps() {
46114 let a = _mm256_set1_ps(1.1);
46115 let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46116 let e = _mm256_set1_ps(1.1);
46117 assert_eq_m256(r, e);
46118 let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111, a);
46119 let e = _mm256_set1_ps(1.0);
46120 assert_eq_m256(r, e);
46121 }
46122
46123 #[simd_test(enable = "avx512f,avx512vl")]
46124 unsafe fn test_mm256_maskz_roundscale_ps() {
46125 let a = _mm256_set1_ps(1.1);
46126 let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46127 assert_eq_m256(r, _mm256_setzero_ps());
46128 let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111, a);
46129 let e = _mm256_set1_ps(1.0);
46130 assert_eq_m256(r, e);
46131 }
46132
46133 #[simd_test(enable = "avx512f,avx512vl")]
46134 unsafe fn test_mm_roundscale_ps() {
46135 let a = _mm_set1_ps(1.1);
46136 let r = _mm_roundscale_ps::<0b00_00_00_00>(a);
46137 let e = _mm_set1_ps(1.0);
46138 assert_eq_m128(r, e);
46139 }
46140
46141 #[simd_test(enable = "avx512f,avx512vl")]
46142 unsafe fn test_mm_mask_roundscale_ps() {
46143 let a = _mm_set1_ps(1.1);
46144 let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46145 let e = _mm_set1_ps(1.1);
46146 assert_eq_m128(r, e);
46147 let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0b00001111, a);
46148 let e = _mm_set1_ps(1.0);
46149 assert_eq_m128(r, e);
46150 }
46151
46152 #[simd_test(enable = "avx512f,avx512vl")]
46153 unsafe fn test_mm_maskz_roundscale_ps() {
46154 let a = _mm_set1_ps(1.1);
46155 let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46156 assert_eq_m128(r, _mm_setzero_ps());
46157 let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0b00001111, a);
46158 let e = _mm_set1_ps(1.0);
46159 assert_eq_m128(r, e);
46160 }
46161
46162 #[simd_test(enable = "avx512f")]
46163 unsafe fn test_mm512_scalef_ps() {
46164 let a = _mm512_set1_ps(1.);
46165 let b = _mm512_set1_ps(3.);
46166 let r = _mm512_scalef_ps(a, b);
46167 let e = _mm512_set1_ps(8.);
46168 assert_eq_m512(r, e);
46169 }
46170
46171 #[simd_test(enable = "avx512f")]
46172 unsafe fn test_mm512_mask_scalef_ps() {
46173 let a = _mm512_set1_ps(1.);
46174 let b = _mm512_set1_ps(3.);
46175 let r = _mm512_mask_scalef_ps(a, 0, a, b);
46176 assert_eq_m512(r, a);
46177 let r = _mm512_mask_scalef_ps(a, 0b11111111_00000000, a, b);
46178 let e = _mm512_set_ps(
46179 8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
46180 );
46181 assert_eq_m512(r, e);
46182 }
46183
46184 #[simd_test(enable = "avx512f")]
46185 unsafe fn test_mm512_maskz_scalef_ps() {
46186 let a = _mm512_set1_ps(1.);
46187 let b = _mm512_set1_ps(3.);
46188 let r = _mm512_maskz_scalef_ps(0, a, b);
46189 assert_eq_m512(r, _mm512_setzero_ps());
46190 let r = _mm512_maskz_scalef_ps(0b11111111_00000000, a, b);
46191 let e = _mm512_set_ps(
46192 8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
46193 );
46194 assert_eq_m512(r, e);
46195 }
46196
46197 #[simd_test(enable = "avx512f,avx512vl")]
46198 unsafe fn test_mm256_scalef_ps() {
46199 let a = _mm256_set1_ps(1.);
46200 let b = _mm256_set1_ps(3.);
46201 let r = _mm256_scalef_ps(a, b);
46202 let e = _mm256_set1_ps(8.);
46203 assert_eq_m256(r, e);
46204 }
46205
46206 #[simd_test(enable = "avx512f,avx512vl")]
46207 unsafe fn test_mm256_mask_scalef_ps() {
46208 let a = _mm256_set1_ps(1.);
46209 let b = _mm256_set1_ps(3.);
46210 let r = _mm256_mask_scalef_ps(a, 0, a, b);
46211 assert_eq_m256(r, a);
46212 let r = _mm256_mask_scalef_ps(a, 0b11111111, a, b);
46213 let e = _mm256_set1_ps(8.);
46214 assert_eq_m256(r, e);
46215 }
46216
46217 #[simd_test(enable = "avx512f,avx512vl")]
46218 unsafe fn test_mm256_maskz_scalef_ps() {
46219 let a = _mm256_set1_ps(1.);
46220 let b = _mm256_set1_ps(3.);
46221 let r = _mm256_maskz_scalef_ps(0, a, b);
46222 assert_eq_m256(r, _mm256_setzero_ps());
46223 let r = _mm256_maskz_scalef_ps(0b11111111, a, b);
46224 let e = _mm256_set1_ps(8.);
46225 assert_eq_m256(r, e);
46226 }
46227
46228 #[simd_test(enable = "avx512f,avx512vl")]
46229 unsafe fn test_mm_scalef_ps() {
46230 let a = _mm_set1_ps(1.);
46231 let b = _mm_set1_ps(3.);
46232 let r = _mm_scalef_ps(a, b);
46233 let e = _mm_set1_ps(8.);
46234 assert_eq_m128(r, e);
46235 }
46236
46237 #[simd_test(enable = "avx512f,avx512vl")]
46238 unsafe fn test_mm_mask_scalef_ps() {
46239 let a = _mm_set1_ps(1.);
46240 let b = _mm_set1_ps(3.);
46241 let r = _mm_mask_scalef_ps(a, 0, a, b);
46242 assert_eq_m128(r, a);
46243 let r = _mm_mask_scalef_ps(a, 0b00001111, a, b);
46244 let e = _mm_set1_ps(8.);
46245 assert_eq_m128(r, e);
46246 }
46247
46248 #[simd_test(enable = "avx512f,avx512vl")]
46249 unsafe fn test_mm_maskz_scalef_ps() {
46250 let a = _mm_set1_ps(1.);
46251 let b = _mm_set1_ps(3.);
46252 let r = _mm_maskz_scalef_ps(0, a, b);
46253 assert_eq_m128(r, _mm_setzero_ps());
46254 let r = _mm_maskz_scalef_ps(0b00001111, a, b);
46255 let e = _mm_set1_ps(8.);
46256 assert_eq_m128(r, e);
46257 }
46258
46259 #[simd_test(enable = "avx512f")]
46260 unsafe fn test_mm512_fixupimm_ps() {
46261 let a = _mm512_set1_ps(f32::NAN);
46262 let b = _mm512_set1_ps(f32::MAX);
46263 let c = _mm512_set1_epi32(i32::MAX);
46264 //let r = _mm512_fixupimm_ps(a, b, c, 5);
46265 let r = _mm512_fixupimm_ps::<5>(a, b, c);
46266 let e = _mm512_set1_ps(0.0);
46267 assert_eq_m512(r, e);
46268 }
46269
46270 #[simd_test(enable = "avx512f")]
46271 unsafe fn test_mm512_mask_fixupimm_ps() {
46272 #[rustfmt::skip]
46273 let a = _mm512_set_ps(
46274 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46275 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46276 1., 1., 1., 1.,
46277 1., 1., 1., 1.,
46278 );
46279 let b = _mm512_set1_ps(f32::MAX);
46280 let c = _mm512_set1_epi32(i32::MAX);
46281 let r = _mm512_mask_fixupimm_ps::<5>(a, 0b11111111_00000000, b, c);
46282 let e = _mm512_set_ps(
46283 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
46284 );
46285 assert_eq_m512(r, e);
46286 }
46287
46288 #[simd_test(enable = "avx512f")]
46289 unsafe fn test_mm512_maskz_fixupimm_ps() {
46290 #[rustfmt::skip]
46291 let a = _mm512_set_ps(
46292 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46293 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46294 1., 1., 1., 1.,
46295 1., 1., 1., 1.,
46296 );
46297 let b = _mm512_set1_ps(f32::MAX);
46298 let c = _mm512_set1_epi32(i32::MAX);
46299 let r = _mm512_maskz_fixupimm_ps::<5>(0b11111111_00000000, a, b, c);
46300 let e = _mm512_set_ps(
46301 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
46302 );
46303 assert_eq_m512(r, e);
46304 }
46305
46306 #[simd_test(enable = "avx512f,avx512vl")]
46307 unsafe fn test_mm256_fixupimm_ps() {
46308 let a = _mm256_set1_ps(f32::NAN);
46309 let b = _mm256_set1_ps(f32::MAX);
46310 let c = _mm256_set1_epi32(i32::MAX);
46311 let r = _mm256_fixupimm_ps::<5>(a, b, c);
46312 let e = _mm256_set1_ps(0.0);
46313 assert_eq_m256(r, e);
46314 }
46315
46316 #[simd_test(enable = "avx512f,avx512vl")]
46317 unsafe fn test_mm256_mask_fixupimm_ps() {
46318 let a = _mm256_set1_ps(f32::NAN);
46319 let b = _mm256_set1_ps(f32::MAX);
46320 let c = _mm256_set1_epi32(i32::MAX);
46321 let r = _mm256_mask_fixupimm_ps::<5>(a, 0b11111111, b, c);
46322 let e = _mm256_set1_ps(0.0);
46323 assert_eq_m256(r, e);
46324 }
46325
46326 #[simd_test(enable = "avx512f,avx512vl")]
46327 unsafe fn test_mm256_maskz_fixupimm_ps() {
46328 let a = _mm256_set1_ps(f32::NAN);
46329 let b = _mm256_set1_ps(f32::MAX);
46330 let c = _mm256_set1_epi32(i32::MAX);
46331 let r = _mm256_maskz_fixupimm_ps::<5>(0b11111111, a, b, c);
46332 let e = _mm256_set1_ps(0.0);
46333 assert_eq_m256(r, e);
46334 }
46335
46336 #[simd_test(enable = "avx512f,avx512vl")]
46337 unsafe fn test_mm_fixupimm_ps() {
46338 let a = _mm_set1_ps(f32::NAN);
46339 let b = _mm_set1_ps(f32::MAX);
46340 let c = _mm_set1_epi32(i32::MAX);
46341 let r = _mm_fixupimm_ps::<5>(a, b, c);
46342 let e = _mm_set1_ps(0.0);
46343 assert_eq_m128(r, e);
46344 }
46345
46346 #[simd_test(enable = "avx512f,avx512vl")]
46347 unsafe fn test_mm_mask_fixupimm_ps() {
46348 let a = _mm_set1_ps(f32::NAN);
46349 let b = _mm_set1_ps(f32::MAX);
46350 let c = _mm_set1_epi32(i32::MAX);
46351 let r = _mm_mask_fixupimm_ps::<5>(a, 0b00001111, b, c);
46352 let e = _mm_set1_ps(0.0);
46353 assert_eq_m128(r, e);
46354 }
46355
46356 #[simd_test(enable = "avx512f,avx512vl")]
46357 unsafe fn test_mm_maskz_fixupimm_ps() {
46358 let a = _mm_set1_ps(f32::NAN);
46359 let b = _mm_set1_ps(f32::MAX);
46360 let c = _mm_set1_epi32(i32::MAX);
46361 let r = _mm_maskz_fixupimm_ps::<5>(0b00001111, a, b, c);
46362 let e = _mm_set1_ps(0.0);
46363 assert_eq_m128(r, e);
46364 }
46365
46366 #[simd_test(enable = "avx512f")]
46367 unsafe fn test_mm512_ternarylogic_epi32() {
46368 let a = _mm512_set1_epi32(1 << 2);
46369 let b = _mm512_set1_epi32(1 << 1);
46370 let c = _mm512_set1_epi32(1 << 0);
46371 let r = _mm512_ternarylogic_epi32::<8>(a, b, c);
46372 let e = _mm512_set1_epi32(0);
46373 assert_eq_m512i(r, e);
46374 }
46375
46376 #[simd_test(enable = "avx512f")]
46377 unsafe fn test_mm512_mask_ternarylogic_epi32() {
46378 let src = _mm512_set1_epi32(1 << 2);
46379 let a = _mm512_set1_epi32(1 << 1);
46380 let b = _mm512_set1_epi32(1 << 0);
46381 let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46382 assert_eq_m512i(r, src);
46383 let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0b11111111_11111111, a, b);
46384 let e = _mm512_set1_epi32(0);
46385 assert_eq_m512i(r, e);
46386 }
46387
46388 #[simd_test(enable = "avx512f")]
46389 unsafe fn test_mm512_maskz_ternarylogic_epi32() {
46390 let a = _mm512_set1_epi32(1 << 2);
46391 let b = _mm512_set1_epi32(1 << 1);
46392 let c = _mm512_set1_epi32(1 << 0);
46393 let r = _mm512_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46394 assert_eq_m512i(r, _mm512_setzero_si512());
46395 let r = _mm512_maskz_ternarylogic_epi32::<8>(0b11111111_11111111, a, b, c);
46396 let e = _mm512_set1_epi32(0);
46397 assert_eq_m512i(r, e);
46398 }
46399
46400 #[simd_test(enable = "avx512f,avx512vl")]
46401 unsafe fn test_mm256_ternarylogic_epi32() {
46402 let a = _mm256_set1_epi32(1 << 2);
46403 let b = _mm256_set1_epi32(1 << 1);
46404 let c = _mm256_set1_epi32(1 << 0);
46405 let r = _mm256_ternarylogic_epi32::<8>(a, b, c);
46406 let e = _mm256_set1_epi32(0);
46407 assert_eq_m256i(r, e);
46408 }
46409
46410 #[simd_test(enable = "avx512f,avx512vl")]
46411 unsafe fn test_mm256_mask_ternarylogic_epi32() {
46412 let src = _mm256_set1_epi32(1 << 2);
46413 let a = _mm256_set1_epi32(1 << 1);
46414 let b = _mm256_set1_epi32(1 << 0);
46415 let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46416 assert_eq_m256i(r, src);
46417 let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0b11111111, a, b);
46418 let e = _mm256_set1_epi32(0);
46419 assert_eq_m256i(r, e);
46420 }
46421
46422 #[simd_test(enable = "avx512f,avx512vl")]
46423 unsafe fn test_mm256_maskz_ternarylogic_epi32() {
46424 let a = _mm256_set1_epi32(1 << 2);
46425 let b = _mm256_set1_epi32(1 << 1);
46426 let c = _mm256_set1_epi32(1 << 0);
46427 let r = _mm256_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46428 assert_eq_m256i(r, _mm256_setzero_si256());
46429 let r = _mm256_maskz_ternarylogic_epi32::<8>(0b11111111, a, b, c);
46430 let e = _mm256_set1_epi32(0);
46431 assert_eq_m256i(r, e);
46432 }
46433
46434 #[simd_test(enable = "avx512f,avx512vl")]
46435 unsafe fn test_mm_ternarylogic_epi32() {
46436 let a = _mm_set1_epi32(1 << 2);
46437 let b = _mm_set1_epi32(1 << 1);
46438 let c = _mm_set1_epi32(1 << 0);
46439 let r = _mm_ternarylogic_epi32::<8>(a, b, c);
46440 let e = _mm_set1_epi32(0);
46441 assert_eq_m128i(r, e);
46442 }
46443
46444 #[simd_test(enable = "avx512f,avx512vl")]
46445 unsafe fn test_mm_mask_ternarylogic_epi32() {
46446 let src = _mm_set1_epi32(1 << 2);
46447 let a = _mm_set1_epi32(1 << 1);
46448 let b = _mm_set1_epi32(1 << 0);
46449 let r = _mm_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46450 assert_eq_m128i(r, src);
46451 let r = _mm_mask_ternarylogic_epi32::<8>(src, 0b00001111, a, b);
46452 let e = _mm_set1_epi32(0);
46453 assert_eq_m128i(r, e);
46454 }
46455
46456 #[simd_test(enable = "avx512f,avx512vl")]
46457 unsafe fn test_mm_maskz_ternarylogic_epi32() {
46458 let a = _mm_set1_epi32(1 << 2);
46459 let b = _mm_set1_epi32(1 << 1);
46460 let c = _mm_set1_epi32(1 << 0);
46461 let r = _mm_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46462 assert_eq_m128i(r, _mm_setzero_si128());
46463 let r = _mm_maskz_ternarylogic_epi32::<8>(0b00001111, a, b, c);
46464 let e = _mm_set1_epi32(0);
46465 assert_eq_m128i(r, e);
46466 }
46467
46468 #[simd_test(enable = "avx512f")]
46469 unsafe fn test_mm512_getmant_ps() {
46470 let a = _mm512_set1_ps(10.);
46471 let r = _mm512_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46472 let e = _mm512_set1_ps(1.25);
46473 assert_eq_m512(r, e);
46474 }
46475
46476 #[simd_test(enable = "avx512f")]
46477 unsafe fn test_mm512_mask_getmant_ps() {
46478 let a = _mm512_set1_ps(10.);
46479 let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46480 assert_eq_m512(r, a);
46481 let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(
46482 a,
46483 0b11111111_00000000,
46484 a,
46485 );
46486 let e = _mm512_setr_ps(
46487 10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
46488 );
46489 assert_eq_m512(r, e);
46490 }
46491
46492 #[simd_test(enable = "avx512f")]
46493 unsafe fn test_mm512_maskz_getmant_ps() {
46494 let a = _mm512_set1_ps(10.);
46495 let r = _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46496 assert_eq_m512(r, _mm512_setzero_ps());
46497 let r =
46498 _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111_00000000, a);
46499 let e = _mm512_setr_ps(
46500 0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
46501 );
46502 assert_eq_m512(r, e);
46503 }
46504
46505 #[simd_test(enable = "avx512f,avx512vl")]
46506 unsafe fn test_mm256_getmant_ps() {
46507 let a = _mm256_set1_ps(10.);
46508 let r = _mm256_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46509 let e = _mm256_set1_ps(1.25);
46510 assert_eq_m256(r, e);
46511 }
46512
46513 #[simd_test(enable = "avx512f,avx512vl")]
46514 unsafe fn test_mm256_mask_getmant_ps() {
46515 let a = _mm256_set1_ps(10.);
46516 let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46517 assert_eq_m256(r, a);
46518 let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a);
46519 let e = _mm256_set1_ps(1.25);
46520 assert_eq_m256(r, e);
46521 }
46522
46523 #[simd_test(enable = "avx512f,avx512vl")]
46524 unsafe fn test_mm256_maskz_getmant_ps() {
46525 let a = _mm256_set1_ps(10.);
46526 let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46527 assert_eq_m256(r, _mm256_setzero_ps());
46528 let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a);
46529 let e = _mm256_set1_ps(1.25);
46530 assert_eq_m256(r, e);
46531 }
46532
46533 #[simd_test(enable = "avx512f,avx512vl")]
46534 unsafe fn test_mm_getmant_ps() {
46535 let a = _mm_set1_ps(10.);
46536 let r = _mm_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46537 let e = _mm_set1_ps(1.25);
46538 assert_eq_m128(r, e);
46539 }
46540
46541 #[simd_test(enable = "avx512f,avx512vl")]
46542 unsafe fn test_mm_mask_getmant_ps() {
46543 let a = _mm_set1_ps(10.);
46544 let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46545 assert_eq_m128(r, a);
46546 let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00001111, a);
46547 let e = _mm_set1_ps(1.25);
46548 assert_eq_m128(r, e);
46549 }
46550
46551 #[simd_test(enable = "avx512f,avx512vl")]
46552 unsafe fn test_mm_maskz_getmant_ps() {
46553 let a = _mm_set1_ps(10.);
46554 let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46555 assert_eq_m128(r, _mm_setzero_ps());
46556 let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00001111, a);
46557 let e = _mm_set1_ps(1.25);
46558 assert_eq_m128(r, e);
46559 }
46560
46561 #[simd_test(enable = "avx512f")]
46562 unsafe fn test_mm512_add_round_ps() {
46563 let a = _mm512_setr_ps(
46564 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46565 );
46566 let b = _mm512_set1_ps(-1.);
46567 let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46568 #[rustfmt::skip]
46569 let e = _mm512_setr_ps(
46570 -1., 0.5, 1., 2.5,
46571 3., 4.5, 5., 6.5,
46572 7., 8.5, 9., 10.5,
46573 11., 12.5, 13., -0.99999994,
46574 );
46575 assert_eq_m512(r, e);
46576 let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46577 let e = _mm512_setr_ps(
46578 -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
46579 );
46580 assert_eq_m512(r, e);
46581 }
46582
46583 #[simd_test(enable = "avx512f")]
46584 unsafe fn test_mm512_mask_add_round_ps() {
46585 let a = _mm512_setr_ps(
46586 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46587 );
46588 let b = _mm512_set1_ps(-1.);
46589 let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
46590 assert_eq_m512(r, a);
46591 let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46592 a,
46593 0b11111111_00000000,
46594 a,
46595 b,
46596 );
46597 #[rustfmt::skip]
46598 let e = _mm512_setr_ps(
46599 0., 1.5, 2., 3.5,
46600 4., 5.5, 6., 7.5,
46601 7., 8.5, 9., 10.5,
46602 11., 12.5, 13., -0.99999994,
46603 );
46604 assert_eq_m512(r, e);
46605 }
46606
46607 #[simd_test(enable = "avx512f")]
46608 unsafe fn test_mm512_maskz_add_round_ps() {
46609 let a = _mm512_setr_ps(
46610 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46611 );
46612 let b = _mm512_set1_ps(-1.);
46613 let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
46614 assert_eq_m512(r, _mm512_setzero_ps());
46615 let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46616 0b11111111_00000000,
46617 a,
46618 b,
46619 );
46620 #[rustfmt::skip]
46621 let e = _mm512_setr_ps(
46622 0., 0., 0., 0.,
46623 0., 0., 0., 0.,
46624 7., 8.5, 9., 10.5,
46625 11., 12.5, 13., -0.99999994,
46626 );
46627 assert_eq_m512(r, e);
46628 }
46629
46630 #[simd_test(enable = "avx512f")]
46631 unsafe fn test_mm512_sub_round_ps() {
46632 let a = _mm512_setr_ps(
46633 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46634 );
46635 let b = _mm512_set1_ps(1.);
46636 let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46637 #[rustfmt::skip]
46638 let e = _mm512_setr_ps(
46639 -1., 0.5, 1., 2.5,
46640 3., 4.5, 5., 6.5,
46641 7., 8.5, 9., 10.5,
46642 11., 12.5, 13., -0.99999994,
46643 );
46644 assert_eq_m512(r, e);
46645 let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46646 let e = _mm512_setr_ps(
46647 -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
46648 );
46649 assert_eq_m512(r, e);
46650 }
46651
46652 #[simd_test(enable = "avx512f")]
46653 unsafe fn test_mm512_mask_sub_round_ps() {
46654 let a = _mm512_setr_ps(
46655 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46656 );
46657 let b = _mm512_set1_ps(1.);
46658 let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46659 a, 0, a, b,
46660 );
46661 assert_eq_m512(r, a);
46662 let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46663 a,
46664 0b11111111_00000000,
46665 a,
46666 b,
46667 );
46668 #[rustfmt::skip]
46669 let e = _mm512_setr_ps(
46670 0., 1.5, 2., 3.5,
46671 4., 5.5, 6., 7.5,
46672 7., 8.5, 9., 10.5,
46673 11., 12.5, 13., -0.99999994,
46674 );
46675 assert_eq_m512(r, e);
46676 }
46677
46678 #[simd_test(enable = "avx512f")]
46679 unsafe fn test_mm512_maskz_sub_round_ps() {
46680 let a = _mm512_setr_ps(
46681 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46682 );
46683 let b = _mm512_set1_ps(1.);
46684 let r =
46685 _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46686 assert_eq_m512(r, _mm512_setzero_ps());
46687 let r = _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46688 0b11111111_00000000,
46689 a,
46690 b,
46691 );
46692 #[rustfmt::skip]
46693 let e = _mm512_setr_ps(
46694 0., 0., 0., 0.,
46695 0., 0., 0., 0.,
46696 7., 8.5, 9., 10.5,
46697 11., 12.5, 13., -0.99999994,
46698 );
46699 assert_eq_m512(r, e);
46700 }
46701
46702 #[simd_test(enable = "avx512f")]
46703 unsafe fn test_mm512_mul_round_ps() {
46704 #[rustfmt::skip]
46705 let a = _mm512_setr_ps(
46706 0., 1.5, 2., 3.5,
46707 4., 5.5, 6., 7.5,
46708 8., 9.5, 10., 11.5,
46709 12., 13.5, 14., 0.00000000000000000000007,
46710 );
46711 let b = _mm512_set1_ps(0.1);
46712 let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46713 #[rustfmt::skip]
46714 let e = _mm512_setr_ps(
46715 0., 0.15, 0.2, 0.35,
46716 0.4, 0.55, 0.6, 0.75,
46717 0.8, 0.95, 1.0, 1.15,
46718 1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46719 );
46720 assert_eq_m512(r, e);
46721 let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46722 #[rustfmt::skip]
46723 let e = _mm512_setr_ps(
46724 0., 0.14999999, 0.2, 0.35,
46725 0.4, 0.54999995, 0.59999996, 0.75,
46726 0.8, 0.95, 1.0, 1.15,
46727 1.1999999, 1.3499999, 1.4, 0.000000000000000000000007,
46728 );
46729 assert_eq_m512(r, e);
46730 }
46731
46732 #[simd_test(enable = "avx512f")]
46733 unsafe fn test_mm512_mask_mul_round_ps() {
46734 #[rustfmt::skip]
46735 let a = _mm512_setr_ps(
46736 0., 1.5, 2., 3.5,
46737 4., 5.5, 6., 7.5,
46738 8., 9.5, 10., 11.5,
46739 12., 13.5, 14., 0.00000000000000000000007,
46740 );
46741 let b = _mm512_set1_ps(0.1);
46742 let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46743 a, 0, a, b,
46744 );
46745 assert_eq_m512(r, a);
46746 let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46747 a,
46748 0b11111111_00000000,
46749 a,
46750 b,
46751 );
46752 #[rustfmt::skip]
46753 let e = _mm512_setr_ps(
46754 0., 1.5, 2., 3.5,
46755 4., 5.5, 6., 7.5,
46756 0.8, 0.95, 1.0, 1.15,
46757 1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46758 );
46759 assert_eq_m512(r, e);
46760 }
46761
46762 #[simd_test(enable = "avx512f")]
46763 unsafe fn test_mm512_maskz_mul_round_ps() {
46764 #[rustfmt::skip]
46765 let a = _mm512_setr_ps(
46766 0., 1.5, 2., 3.5,
46767 4., 5.5, 6., 7.5,
46768 8., 9.5, 10., 11.5,
46769 12., 13.5, 14., 0.00000000000000000000007,
46770 );
46771 let b = _mm512_set1_ps(0.1);
46772 let r =
46773 _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46774 assert_eq_m512(r, _mm512_setzero_ps());
46775 let r = _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46776 0b11111111_00000000,
46777 a,
46778 b,
46779 );
46780 #[rustfmt::skip]
46781 let e = _mm512_setr_ps(
46782 0., 0., 0., 0.,
46783 0., 0., 0., 0.,
46784 0.8, 0.95, 1.0, 1.15,
46785 1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46786 );
46787 assert_eq_m512(r, e);
46788 }
46789
46790 #[simd_test(enable = "avx512f")]
46791 unsafe fn test_mm512_div_round_ps() {
46792 let a = _mm512_set1_ps(1.);
46793 let b = _mm512_set1_ps(3.);
46794 let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46795 let e = _mm512_set1_ps(0.33333334);
46796 assert_eq_m512(r, e);
46797 let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46798 let e = _mm512_set1_ps(0.3333333);
46799 assert_eq_m512(r, e);
46800 }
46801
46802 #[simd_test(enable = "avx512f")]
46803 unsafe fn test_mm512_mask_div_round_ps() {
46804 let a = _mm512_set1_ps(1.);
46805 let b = _mm512_set1_ps(3.);
46806 let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46807 a, 0, a, b,
46808 );
46809 assert_eq_m512(r, a);
46810 let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46811 a,
46812 0b11111111_00000000,
46813 a,
46814 b,
46815 );
46816 let e = _mm512_setr_ps(
46817 1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46818 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46819 );
46820 assert_eq_m512(r, e);
46821 }
46822
46823 #[simd_test(enable = "avx512f")]
46824 unsafe fn test_mm512_maskz_div_round_ps() {
46825 let a = _mm512_set1_ps(1.);
46826 let b = _mm512_set1_ps(3.);
46827 let r =
46828 _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46829 assert_eq_m512(r, _mm512_setzero_ps());
46830 let r = _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46831 0b11111111_00000000,
46832 a,
46833 b,
46834 );
46835 let e = _mm512_setr_ps(
46836 0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46837 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46838 );
46839 assert_eq_m512(r, e);
46840 }
46841
46842 #[simd_test(enable = "avx512f")]
46843 unsafe fn test_mm512_sqrt_round_ps() {
46844 let a = _mm512_set1_ps(3.);
46845 let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
46846 let e = _mm512_set1_ps(1.7320508);
46847 assert_eq_m512(r, e);
46848 let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC }>(a);
46849 let e = _mm512_set1_ps(1.7320509);
46850 assert_eq_m512(r, e);
46851 }
46852
46853 #[simd_test(enable = "avx512f")]
46854 unsafe fn test_mm512_mask_sqrt_round_ps() {
46855 let a = _mm512_set1_ps(3.);
46856 let r =
46857 _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a);
46858 assert_eq_m512(r, a);
46859 let r = _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46860 a,
46861 0b11111111_00000000,
46862 a,
46863 );
46864 let e = _mm512_setr_ps(
46865 3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
46866 1.7320508, 1.7320508, 1.7320508,
46867 );
46868 assert_eq_m512(r, e);
46869 }
46870
46871 #[simd_test(enable = "avx512f")]
46872 unsafe fn test_mm512_maskz_sqrt_round_ps() {
46873 let a = _mm512_set1_ps(3.);
46874 let r =
46875 _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a);
46876 assert_eq_m512(r, _mm512_setzero_ps());
46877 let r = _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46878 0b11111111_00000000,
46879 a,
46880 );
46881 let e = _mm512_setr_ps(
46882 0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
46883 1.7320508, 1.7320508, 1.7320508,
46884 );
46885 assert_eq_m512(r, e);
46886 }
46887
46888 #[simd_test(enable = "avx512f")]
46889 unsafe fn test_mm512_fmadd_round_ps() {
46890 let a = _mm512_set1_ps(0.00000007);
46891 let b = _mm512_set1_ps(1.);
46892 let c = _mm512_set1_ps(-1.);
46893 let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46894 let e = _mm512_set1_ps(-0.99999994);
46895 assert_eq_m512(r, e);
46896 let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
46897 let e = _mm512_set1_ps(-0.9999999);
46898 assert_eq_m512(r, e);
46899 }
46900
46901 #[simd_test(enable = "avx512f")]
46902 unsafe fn test_mm512_mask_fmadd_round_ps() {
46903 let a = _mm512_set1_ps(0.00000007);
46904 let b = _mm512_set1_ps(1.);
46905 let c = _mm512_set1_ps(-1.);
46906 let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46907 a, 0, b, c,
46908 );
46909 assert_eq_m512(r, a);
46910 let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46911 a,
46912 0b00000000_11111111,
46913 b,
46914 c,
46915 );
46916 #[rustfmt::skip]
46917 let e = _mm512_setr_ps(
46918 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46919 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46920 0.00000007, 0.00000007, 0.00000007, 0.00000007,
46921 0.00000007, 0.00000007, 0.00000007, 0.00000007,
46922 );
46923 assert_eq_m512(r, e);
46924 }
46925
46926 #[simd_test(enable = "avx512f")]
46927 unsafe fn test_mm512_maskz_fmadd_round_ps() {
46928 let a = _mm512_set1_ps(0.00000007);
46929 let b = _mm512_set1_ps(1.);
46930 let c = _mm512_set1_ps(-1.);
46931 let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46932 0, a, b, c,
46933 );
46934 assert_eq_m512(r, _mm512_setzero_ps());
46935 #[rustfmt::skip]
46936 let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46937 0b00000000_11111111,
46938 a,
46939 b,
46940 c,
46941 );
46942 #[rustfmt::skip]
46943 let e = _mm512_setr_ps(
46944 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46945 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46946 0., 0., 0., 0.,
46947 0., 0., 0., 0.,
46948 );
46949 assert_eq_m512(r, e);
46950 }
46951
46952 #[simd_test(enable = "avx512f")]
46953 unsafe fn test_mm512_mask3_fmadd_round_ps() {
46954 let a = _mm512_set1_ps(0.00000007);
46955 let b = _mm512_set1_ps(1.);
46956 let c = _mm512_set1_ps(-1.);
46957 let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46958 a, b, c, 0,
46959 );
46960 assert_eq_m512(r, c);
46961 let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46962 a,
46963 b,
46964 c,
46965 0b00000000_11111111,
46966 );
46967 #[rustfmt::skip]
46968 let e = _mm512_setr_ps(
46969 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46970 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46971 -1., -1., -1., -1.,
46972 -1., -1., -1., -1.,
46973 );
46974 assert_eq_m512(r, e);
46975 }
46976
46977 #[simd_test(enable = "avx512f")]
46978 unsafe fn test_mm512_fmsub_round_ps() {
46979 let a = _mm512_set1_ps(0.00000007);
46980 let b = _mm512_set1_ps(1.);
46981 let c = _mm512_set1_ps(1.);
46982 let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46983 let e = _mm512_set1_ps(-0.99999994);
46984 assert_eq_m512(r, e);
46985 let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
46986 let e = _mm512_set1_ps(-0.9999999);
46987 assert_eq_m512(r, e);
46988 }
46989
46990 #[simd_test(enable = "avx512f")]
46991 unsafe fn test_mm512_mask_fmsub_round_ps() {
46992 let a = _mm512_set1_ps(0.00000007);
46993 let b = _mm512_set1_ps(1.);
46994 let c = _mm512_set1_ps(1.);
46995 let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46996 a, 0, b, c,
46997 );
46998 assert_eq_m512(r, a);
46999 let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47000 a,
47001 0b00000000_11111111,
47002 b,
47003 c,
47004 );
47005 #[rustfmt::skip]
47006 let e = _mm512_setr_ps(
47007 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
47008 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
47009 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47010 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47011 );
47012 assert_eq_m512(r, e);
47013 }
47014
47015 #[simd_test(enable = "avx512f")]
47016 unsafe fn test_mm512_maskz_fmsub_round_ps() {
47017 let a = _mm512_set1_ps(0.00000007);
47018 let b = _mm512_set1_ps(1.);
47019 let c = _mm512_set1_ps(1.);
47020 let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47021 0, a, b, c,
47022 );
47023 assert_eq_m512(r, _mm512_setzero_ps());
47024 let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47025 0b00000000_11111111,
47026 a,
47027 b,
47028 c,
47029 );
47030 #[rustfmt::skip]
47031 let e = _mm512_setr_ps(
47032 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
47033 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
47034 0., 0., 0., 0.,
47035 0., 0., 0., 0.,
47036 );
47037 assert_eq_m512(r, e);
47038 }
47039
47040 #[simd_test(enable = "avx512f")]
47041 unsafe fn test_mm512_mask3_fmsub_round_ps() {
47042 let a = _mm512_set1_ps(0.00000007);
47043 let b = _mm512_set1_ps(1.);
47044 let c = _mm512_set1_ps(1.);
47045 let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47046 a, b, c, 0,
47047 );
47048 assert_eq_m512(r, c);
47049 let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47050 a,
47051 b,
47052 c,
47053 0b00000000_11111111,
47054 );
47055 #[rustfmt::skip]
47056 let e = _mm512_setr_ps(
47057 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
47058 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
47059 1., 1., 1., 1.,
47060 1., 1., 1., 1.,
47061 );
47062 assert_eq_m512(r, e);
47063 }
47064
47065 #[simd_test(enable = "avx512f")]
47066 unsafe fn test_mm512_fmaddsub_round_ps() {
47067 let a = _mm512_set1_ps(0.00000007);
47068 let b = _mm512_set1_ps(1.);
47069 let c = _mm512_set1_ps(-1.);
47070 let r =
47071 _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47072 #[rustfmt::skip]
47073 let e = _mm512_setr_ps(
47074 1.0000001, -0.99999994, 1.0000001, -0.99999994,
47075 1.0000001, -0.99999994, 1.0000001, -0.99999994,
47076 1.0000001, -0.99999994, 1.0000001, -0.99999994,
47077 1.0000001, -0.99999994, 1.0000001, -0.99999994,
47078 );
47079 assert_eq_m512(r, e);
47080 let r = _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47081 let e = _mm512_setr_ps(
47082 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47083 -0.9999999, 1., -0.9999999, 1., -0.9999999,
47084 );
47085 assert_eq_m512(r, e);
47086 }
47087
47088 #[simd_test(enable = "avx512f")]
47089 unsafe fn test_mm512_mask_fmaddsub_round_ps() {
47090 let a = _mm512_set1_ps(0.00000007);
47091 let b = _mm512_set1_ps(1.);
47092 let c = _mm512_set1_ps(-1.);
47093 let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47094 a, 0, b, c,
47095 );
47096 assert_eq_m512(r, a);
47097 let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47098 a,
47099 0b00000000_11111111,
47100 b,
47101 c,
47102 );
47103 #[rustfmt::skip]
47104 let e = _mm512_setr_ps(
47105 1.0000001, -0.99999994, 1.0000001, -0.99999994,
47106 1.0000001, -0.99999994, 1.0000001, -0.99999994,
47107 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47108 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47109 );
47110 assert_eq_m512(r, e);
47111 }
47112
47113 #[simd_test(enable = "avx512f")]
47114 unsafe fn test_mm512_maskz_fmaddsub_round_ps() {
47115 let a = _mm512_set1_ps(0.00000007);
47116 let b = _mm512_set1_ps(1.);
47117 let c = _mm512_set1_ps(-1.);
47118 let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47119 0, a, b, c,
47120 );
47121 assert_eq_m512(r, _mm512_setzero_ps());
47122 let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47123 0b00000000_11111111,
47124 a,
47125 b,
47126 c,
47127 );
47128 #[rustfmt::skip]
47129 let e = _mm512_setr_ps(
47130 1.0000001, -0.99999994, 1.0000001, -0.99999994,
47131 1.0000001, -0.99999994, 1.0000001, -0.99999994,
47132 0., 0., 0., 0.,
47133 0., 0., 0., 0.,
47134 );
47135 assert_eq_m512(r, e);
47136 }
47137
47138 #[simd_test(enable = "avx512f")]
47139 unsafe fn test_mm512_mask3_fmaddsub_round_ps() {
47140 let a = _mm512_set1_ps(0.00000007);
47141 let b = _mm512_set1_ps(1.);
47142 let c = _mm512_set1_ps(-1.);
47143 let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47144 a, b, c, 0,
47145 );
47146 assert_eq_m512(r, c);
47147 let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47148 a,
47149 b,
47150 c,
47151 0b00000000_11111111,
47152 );
47153 #[rustfmt::skip]
47154 let e = _mm512_setr_ps(
47155 1.0000001, -0.99999994, 1.0000001, -0.99999994,
47156 1.0000001, -0.99999994, 1.0000001, -0.99999994,
47157 -1., -1., -1., -1.,
47158 -1., -1., -1., -1.,
47159 );
47160 assert_eq_m512(r, e);
47161 }
47162
47163 #[simd_test(enable = "avx512f")]
47164 unsafe fn test_mm512_fmsubadd_round_ps() {
47165 let a = _mm512_set1_ps(0.00000007);
47166 let b = _mm512_set1_ps(1.);
47167 let c = _mm512_set1_ps(-1.);
47168 let r =
47169 _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47170 #[rustfmt::skip]
47171 let e = _mm512_setr_ps(
47172 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47173 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47174 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47175 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47176 );
47177 assert_eq_m512(r, e);
47178 let r = _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47179 let e = _mm512_setr_ps(
47180 -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47181 -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47182 );
47183 assert_eq_m512(r, e);
47184 }
47185
47186 #[simd_test(enable = "avx512f")]
47187 unsafe fn test_mm512_mask_fmsubadd_round_ps() {
47188 let a = _mm512_set1_ps(0.00000007);
47189 let b = _mm512_set1_ps(1.);
47190 let c = _mm512_set1_ps(-1.);
47191 let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47192 a, 0, b, c,
47193 );
47194 assert_eq_m512(r, a);
47195 let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47196 a,
47197 0b00000000_11111111,
47198 b,
47199 c,
47200 );
47201 #[rustfmt::skip]
47202 let e = _mm512_setr_ps(
47203 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47204 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47205 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47206 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47207 );
47208 assert_eq_m512(r, e);
47209 }
47210
47211 #[simd_test(enable = "avx512f")]
47212 unsafe fn test_mm512_maskz_fmsubadd_round_ps() {
47213 let a = _mm512_set1_ps(0.00000007);
47214 let b = _mm512_set1_ps(1.);
47215 let c = _mm512_set1_ps(-1.);
47216 let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47217 0, a, b, c,
47218 );
47219 assert_eq_m512(r, _mm512_setzero_ps());
47220 let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47221 0b00000000_11111111,
47222 a,
47223 b,
47224 c,
47225 );
47226 #[rustfmt::skip]
47227 let e = _mm512_setr_ps(
47228 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47229 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47230 0., 0., 0., 0.,
47231 0., 0., 0., 0.,
47232 );
47233 assert_eq_m512(r, e);
47234 }
47235
47236 #[simd_test(enable = "avx512f")]
47237 unsafe fn test_mm512_mask3_fmsubadd_round_ps() {
47238 let a = _mm512_set1_ps(0.00000007);
47239 let b = _mm512_set1_ps(1.);
47240 let c = _mm512_set1_ps(-1.);
47241 let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47242 a, b, c, 0,
47243 );
47244 assert_eq_m512(r, c);
47245 let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47246 a,
47247 b,
47248 c,
47249 0b00000000_11111111,
47250 );
47251 #[rustfmt::skip]
47252 let e = _mm512_setr_ps(
47253 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47254 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47255 -1., -1., -1., -1.,
47256 -1., -1., -1., -1.,
47257 );
47258 assert_eq_m512(r, e);
47259 }
47260
47261 #[simd_test(enable = "avx512f")]
47262 unsafe fn test_mm512_fnmadd_round_ps() {
47263 let a = _mm512_set1_ps(0.00000007);
47264 let b = _mm512_set1_ps(1.);
47265 let c = _mm512_set1_ps(1.);
47266 let r =
47267 _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47268 let e = _mm512_set1_ps(0.99999994);
47269 assert_eq_m512(r, e);
47270 let r = _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47271 let e = _mm512_set1_ps(0.9999999);
47272 assert_eq_m512(r, e);
47273 }
47274
47275 #[simd_test(enable = "avx512f")]
47276 unsafe fn test_mm512_mask_fnmadd_round_ps() {
47277 let a = _mm512_set1_ps(0.00000007);
47278 let b = _mm512_set1_ps(1.);
47279 let c = _mm512_set1_ps(1.);
47280 let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47281 a, 0, b, c,
47282 );
47283 assert_eq_m512(r, a);
47284 let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47285 a,
47286 0b00000000_11111111,
47287 b,
47288 c,
47289 );
47290 let e = _mm512_setr_ps(
47291 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47292 0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47293 0.00000007, 0.00000007,
47294 );
47295 assert_eq_m512(r, e);
47296 }
47297
47298 #[simd_test(enable = "avx512f")]
47299 unsafe fn test_mm512_maskz_fnmadd_round_ps() {
47300 let a = _mm512_set1_ps(0.00000007);
47301 let b = _mm512_set1_ps(1.);
47302 let c = _mm512_set1_ps(1.);
47303 let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47304 0, a, b, c,
47305 );
47306 assert_eq_m512(r, _mm512_setzero_ps());
47307 let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47308 0b00000000_11111111,
47309 a,
47310 b,
47311 c,
47312 );
47313 let e = _mm512_setr_ps(
47314 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47315 0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
47316 );
47317 assert_eq_m512(r, e);
47318 }
47319
47320 #[simd_test(enable = "avx512f")]
47321 unsafe fn test_mm512_mask3_fnmadd_round_ps() {
47322 let a = _mm512_set1_ps(0.00000007);
47323 let b = _mm512_set1_ps(1.);
47324 let c = _mm512_set1_ps(1.);
47325 let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47326 a, b, c, 0,
47327 );
47328 assert_eq_m512(r, c);
47329 let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47330 a,
47331 b,
47332 c,
47333 0b00000000_11111111,
47334 );
47335 let e = _mm512_setr_ps(
47336 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47337 0.99999994, 1., 1., 1., 1., 1., 1., 1., 1.,
47338 );
47339 assert_eq_m512(r, e);
47340 }
47341
47342 #[simd_test(enable = "avx512f")]
47343 unsafe fn test_mm512_fnmsub_round_ps() {
47344 let a = _mm512_set1_ps(0.00000007);
47345 let b = _mm512_set1_ps(1.);
47346 let c = _mm512_set1_ps(-1.);
47347 let r =
47348 _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47349 let e = _mm512_set1_ps(0.99999994);
47350 assert_eq_m512(r, e);
47351 let r = _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47352 let e = _mm512_set1_ps(0.9999999);
47353 assert_eq_m512(r, e);
47354 }
47355
47356 #[simd_test(enable = "avx512f")]
47357 unsafe fn test_mm512_mask_fnmsub_round_ps() {
47358 let a = _mm512_set1_ps(0.00000007);
47359 let b = _mm512_set1_ps(1.);
47360 let c = _mm512_set1_ps(-1.);
47361 let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47362 a, 0, b, c,
47363 );
47364 assert_eq_m512(r, a);
47365 let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47366 a,
47367 0b00000000_11111111,
47368 b,
47369 c,
47370 );
47371 let e = _mm512_setr_ps(
47372 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47373 0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47374 0.00000007, 0.00000007,
47375 );
47376 assert_eq_m512(r, e);
47377 }
47378
47379 #[simd_test(enable = "avx512f")]
47380 unsafe fn test_mm512_maskz_fnmsub_round_ps() {
47381 let a = _mm512_set1_ps(0.00000007);
47382 let b = _mm512_set1_ps(1.);
47383 let c = _mm512_set1_ps(-1.);
47384 let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47385 0, a, b, c,
47386 );
47387 assert_eq_m512(r, _mm512_setzero_ps());
47388 let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47389 0b00000000_11111111,
47390 a,
47391 b,
47392 c,
47393 );
47394 let e = _mm512_setr_ps(
47395 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47396 0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
47397 );
47398 assert_eq_m512(r, e);
47399 }
47400
47401 #[simd_test(enable = "avx512f")]
47402 unsafe fn test_mm512_mask3_fnmsub_round_ps() {
47403 let a = _mm512_set1_ps(0.00000007);
47404 let b = _mm512_set1_ps(1.);
47405 let c = _mm512_set1_ps(-1.);
47406 let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47407 a, b, c, 0,
47408 );
47409 assert_eq_m512(r, c);
47410 let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47411 a,
47412 b,
47413 c,
47414 0b00000000_11111111,
47415 );
47416 let e = _mm512_setr_ps(
47417 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47418 0.99999994, -1., -1., -1., -1., -1., -1., -1., -1.,
47419 );
47420 assert_eq_m512(r, e);
47421 }
47422
47423 #[simd_test(enable = "avx512f")]
47424 unsafe fn test_mm512_max_round_ps() {
47425 let a = _mm512_setr_ps(
47426 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47427 );
47428 let b = _mm512_setr_ps(
47429 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47430 );
47431 let r = _mm512_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
47432 let e = _mm512_setr_ps(
47433 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
47434 );
47435 assert_eq_m512(r, e);
47436 }
47437
47438 #[simd_test(enable = "avx512f")]
47439 unsafe fn test_mm512_mask_max_round_ps() {
47440 let a = _mm512_setr_ps(
47441 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47442 );
47443 let b = _mm512_setr_ps(
47444 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47445 );
47446 let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
47447 assert_eq_m512(r, a);
47448 let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
47449 let e = _mm512_setr_ps(
47450 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
47451 );
47452 assert_eq_m512(r, e);
47453 }
47454
47455 #[simd_test(enable = "avx512f")]
47456 unsafe fn test_mm512_maskz_max_round_ps() {
47457 let a = _mm512_setr_ps(
47458 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47459 );
47460 let b = _mm512_setr_ps(
47461 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47462 );
47463 let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
47464 assert_eq_m512(r, _mm512_setzero_ps());
47465 let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
47466 let e = _mm512_setr_ps(
47467 15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
47468 );
47469 assert_eq_m512(r, e);
47470 }
47471
47472 #[simd_test(enable = "avx512f")]
47473 unsafe fn test_mm512_min_round_ps() {
47474 let a = _mm512_setr_ps(
47475 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47476 );
47477 let b = _mm512_setr_ps(
47478 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47479 );
47480 let r = _mm512_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
47481 let e = _mm512_setr_ps(
47482 0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
47483 );
47484 assert_eq_m512(r, e);
47485 }
47486
47487 #[simd_test(enable = "avx512f")]
47488 unsafe fn test_mm512_mask_min_round_ps() {
47489 let a = _mm512_setr_ps(
47490 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47491 );
47492 let b = _mm512_setr_ps(
47493 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47494 );
47495 let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
47496 assert_eq_m512(r, a);
47497 let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
47498 let e = _mm512_setr_ps(
47499 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47500 );
47501 assert_eq_m512(r, e);
47502 }
47503
47504 #[simd_test(enable = "avx512f")]
47505 unsafe fn test_mm512_maskz_min_round_ps() {
47506 let a = _mm512_setr_ps(
47507 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47508 );
47509 let b = _mm512_setr_ps(
47510 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47511 );
47512 let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
47513 assert_eq_m512(r, _mm512_setzero_ps());
47514 let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
47515 let e = _mm512_setr_ps(
47516 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
47517 );
47518 assert_eq_m512(r, e);
47519 }
47520
47521 #[simd_test(enable = "avx512f")]
47522 unsafe fn test_mm512_getexp_round_ps() {
47523 let a = _mm512_set1_ps(3.);
47524 let r = _mm512_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
47525 let e = _mm512_set1_ps(1.);
47526 assert_eq_m512(r, e);
47527 }
47528
47529 #[simd_test(enable = "avx512f")]
47530 unsafe fn test_mm512_mask_getexp_round_ps() {
47531 let a = _mm512_set1_ps(3.);
47532 let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a);
47533 assert_eq_m512(r, a);
47534 let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111_00000000, a);
47535 let e = _mm512_setr_ps(
47536 3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
47537 );
47538 assert_eq_m512(r, e);
47539 }
47540
47541 #[simd_test(enable = "avx512f")]
47542 unsafe fn test_mm512_maskz_getexp_round_ps() {
47543 let a = _mm512_set1_ps(3.);
47544 let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a);
47545 assert_eq_m512(r, _mm512_setzero_ps());
47546 let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b11111111_00000000, a);
47547 let e = _mm512_setr_ps(
47548 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47549 );
47550 assert_eq_m512(r, e);
47551 }
47552
47553 #[simd_test(enable = "avx512f")]
47554 unsafe fn test_mm512_roundscale_round_ps() {
47555 let a = _mm512_set1_ps(1.1);
47556 let r = _mm512_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a);
47557 let e = _mm512_set1_ps(1.0);
47558 assert_eq_m512(r, e);
47559 }
47560
47561 #[simd_test(enable = "avx512f")]
47562 unsafe fn test_mm512_mask_roundscale_round_ps() {
47563 let a = _mm512_set1_ps(1.1);
47564 let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a);
47565 let e = _mm512_set1_ps(1.1);
47566 assert_eq_m512(r, e);
47567 let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(
47568 a,
47569 0b11111111_11111111,
47570 a,
47571 );
47572 let e = _mm512_set1_ps(1.0);
47573 assert_eq_m512(r, e);
47574 }
47575
47576 #[simd_test(enable = "avx512f")]
47577 unsafe fn test_mm512_maskz_roundscale_round_ps() {
47578 let a = _mm512_set1_ps(1.1);
47579 let r = _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0, a);
47580 assert_eq_m512(r, _mm512_setzero_ps());
47581 let r =
47582 _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111_11111111, a);
47583 let e = _mm512_set1_ps(1.0);
47584 assert_eq_m512(r, e);
47585 }
47586
47587 #[simd_test(enable = "avx512f")]
47588 unsafe fn test_mm512_scalef_round_ps() {
47589 let a = _mm512_set1_ps(1.);
47590 let b = _mm512_set1_ps(3.);
47591 let r = _mm512_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
47592 let e = _mm512_set1_ps(8.);
47593 assert_eq_m512(r, e);
47594 }
47595
47596 #[simd_test(enable = "avx512f")]
47597 unsafe fn test_mm512_mask_scalef_round_ps() {
47598 let a = _mm512_set1_ps(1.);
47599 let b = _mm512_set1_ps(3.);
47600 let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47601 a, 0, a, b,
47602 );
47603 assert_eq_m512(r, a);
47604 let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47605 a,
47606 0b11111111_00000000,
47607 a,
47608 b,
47609 );
47610 let e = _mm512_set_ps(
47611 8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
47612 );
47613 assert_eq_m512(r, e);
47614 }
47615
47616 #[simd_test(enable = "avx512f")]
47617 unsafe fn test_mm512_maskz_scalef_round_ps() {
47618 let a = _mm512_set1_ps(1.);
47619 let b = _mm512_set1_ps(3.);
47620 let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47621 0, a, b,
47622 );
47623 assert_eq_m512(r, _mm512_setzero_ps());
47624 let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47625 0b11111111_00000000,
47626 a,
47627 b,
47628 );
47629 let e = _mm512_set_ps(
47630 8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
47631 );
47632 assert_eq_m512(r, e);
47633 }
47634
47635 #[simd_test(enable = "avx512f")]
47636 unsafe fn test_mm512_fixupimm_round_ps() {
47637 let a = _mm512_set1_ps(f32::NAN);
47638 let b = _mm512_set1_ps(f32::MAX);
47639 let c = _mm512_set1_epi32(i32::MAX);
47640 let r = _mm512_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
47641 let e = _mm512_set1_ps(0.0);
47642 assert_eq_m512(r, e);
47643 }
47644
47645 #[simd_test(enable = "avx512f")]
47646 unsafe fn test_mm512_mask_fixupimm_round_ps() {
47647 #[rustfmt::skip]
47648 let a = _mm512_set_ps(
47649 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47650 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47651 1., 1., 1., 1.,
47652 1., 1., 1., 1.,
47653 );
47654 let b = _mm512_set1_ps(f32::MAX);
47655 let c = _mm512_set1_epi32(i32::MAX);
47656 let r = _mm512_mask_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
47657 a,
47658 0b11111111_00000000,
47659 b,
47660 c,
47661 );
47662 let e = _mm512_set_ps(
47663 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47664 );
47665 assert_eq_m512(r, e);
47666 }
47667
47668 #[simd_test(enable = "avx512f")]
47669 unsafe fn test_mm512_maskz_fixupimm_round_ps() {
47670 #[rustfmt::skip]
47671 let a = _mm512_set_ps(
47672 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47673 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47674 1., 1., 1., 1.,
47675 1., 1., 1., 1.,
47676 );
47677 let b = _mm512_set1_ps(f32::MAX);
47678 let c = _mm512_set1_epi32(i32::MAX);
47679 let r = _mm512_maskz_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
47680 0b11111111_00000000,
47681 a,
47682 b,
47683 c,
47684 );
47685 let e = _mm512_set_ps(
47686 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
47687 );
47688 assert_eq_m512(r, e);
47689 }
47690
47691 #[simd_test(enable = "avx512f")]
47692 unsafe fn test_mm512_getmant_round_ps() {
47693 let a = _mm512_set1_ps(10.);
47694 let r = _mm512_getmant_round_ps::<
47695 _MM_MANT_NORM_1_2,
47696 _MM_MANT_SIGN_SRC,
47697 _MM_FROUND_CUR_DIRECTION,
47698 >(a);
47699 let e = _mm512_set1_ps(1.25);
47700 assert_eq_m512(r, e);
47701 }
47702
47703 #[simd_test(enable = "avx512f")]
47704 unsafe fn test_mm512_mask_getmant_round_ps() {
47705 let a = _mm512_set1_ps(10.);
47706 let r = _mm512_mask_getmant_round_ps::<
47707 _MM_MANT_NORM_1_2,
47708 _MM_MANT_SIGN_SRC,
47709 _MM_FROUND_CUR_DIRECTION,
47710 >(a, 0, a);
47711 assert_eq_m512(r, a);
47712 let r = _mm512_mask_getmant_round_ps::<
47713 _MM_MANT_NORM_1_2,
47714 _MM_MANT_SIGN_SRC,
47715 _MM_FROUND_CUR_DIRECTION,
47716 >(a, 0b11111111_00000000, a);
47717 let e = _mm512_setr_ps(
47718 10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
47719 );
47720 assert_eq_m512(r, e);
47721 }
47722
47723 #[simd_test(enable = "avx512f")]
47724 unsafe fn test_mm512_maskz_getmant_round_ps() {
47725 let a = _mm512_set1_ps(10.);
47726 let r = _mm512_maskz_getmant_round_ps::<
47727 _MM_MANT_NORM_1_2,
47728 _MM_MANT_SIGN_SRC,
47729 _MM_FROUND_CUR_DIRECTION,
47730 >(0, a);
47731 assert_eq_m512(r, _mm512_setzero_ps());
47732 let r = _mm512_maskz_getmant_round_ps::<
47733 _MM_MANT_NORM_1_2,
47734 _MM_MANT_SIGN_SRC,
47735 _MM_FROUND_CUR_DIRECTION,
47736 >(0b11111111_00000000, a);
47737 let e = _mm512_setr_ps(
47738 0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
47739 );
47740 assert_eq_m512(r, e);
47741 }
47742
47743 #[simd_test(enable = "avx512f")]
47744 unsafe fn test_mm512_cvtps_epi32() {
47745 let a = _mm512_setr_ps(
47746 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47747 );
47748 let r = _mm512_cvtps_epi32(a);
47749 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47750 assert_eq_m512i(r, e);
47751 }
47752
47753 #[simd_test(enable = "avx512f")]
47754 unsafe fn test_mm512_mask_cvtps_epi32() {
47755 let a = _mm512_setr_ps(
47756 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47757 );
47758 let src = _mm512_set1_epi32(0);
47759 let r = _mm512_mask_cvtps_epi32(src, 0, a);
47760 assert_eq_m512i(r, src);
47761 let r = _mm512_mask_cvtps_epi32(src, 0b00000000_11111111, a);
47762 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
47763 assert_eq_m512i(r, e);
47764 }
47765
47766 #[simd_test(enable = "avx512f")]
47767 unsafe fn test_mm512_maskz_cvtps_epi32() {
47768 let a = _mm512_setr_ps(
47769 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47770 );
47771 let r = _mm512_maskz_cvtps_epi32(0, a);
47772 assert_eq_m512i(r, _mm512_setzero_si512());
47773 let r = _mm512_maskz_cvtps_epi32(0b00000000_11111111, a);
47774 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
47775 assert_eq_m512i(r, e);
47776 }
47777
47778 #[simd_test(enable = "avx512f,avx512vl")]
47779 unsafe fn test_mm256_mask_cvtps_epi32() {
47780 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47781 let src = _mm256_set1_epi32(0);
47782 let r = _mm256_mask_cvtps_epi32(src, 0, a);
47783 assert_eq_m256i(r, src);
47784 let r = _mm256_mask_cvtps_epi32(src, 0b11111111, a);
47785 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47786 assert_eq_m256i(r, e);
47787 }
47788
47789 #[simd_test(enable = "avx512f,avx512vl")]
47790 unsafe fn test_mm256_maskz_cvtps_epi32() {
47791 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47792 let r = _mm256_maskz_cvtps_epi32(0, a);
47793 assert_eq_m256i(r, _mm256_setzero_si256());
47794 let r = _mm256_maskz_cvtps_epi32(0b11111111, a);
47795 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47796 assert_eq_m256i(r, e);
47797 }
47798
47799 #[simd_test(enable = "avx512f,avx512vl")]
47800 unsafe fn test_mm_mask_cvtps_epi32() {
47801 let a = _mm_set_ps(12., 13.5, 14., 15.5);
47802 let src = _mm_set1_epi32(0);
47803 let r = _mm_mask_cvtps_epi32(src, 0, a);
47804 assert_eq_m128i(r, src);
47805 let r = _mm_mask_cvtps_epi32(src, 0b00001111, a);
47806 let e = _mm_set_epi32(12, 14, 14, 16);
47807 assert_eq_m128i(r, e);
47808 }
47809
47810 #[simd_test(enable = "avx512f,avx512vl")]
47811 unsafe fn test_mm_maskz_cvtps_epi32() {
47812 let a = _mm_set_ps(12., 13.5, 14., 15.5);
47813 let r = _mm_maskz_cvtps_epi32(0, a);
47814 assert_eq_m128i(r, _mm_setzero_si128());
47815 let r = _mm_maskz_cvtps_epi32(0b00001111, a);
47816 let e = _mm_set_epi32(12, 14, 14, 16);
47817 assert_eq_m128i(r, e);
47818 }
47819
47820 #[simd_test(enable = "avx512f")]
47821 unsafe fn test_mm512_cvtps_epu32() {
47822 let a = _mm512_setr_ps(
47823 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47824 );
47825 let r = _mm512_cvtps_epu32(a);
47826 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
47827 assert_eq_m512i(r, e);
47828 }
47829
47830 #[simd_test(enable = "avx512f")]
47831 unsafe fn test_mm512_mask_cvtps_epu32() {
47832 let a = _mm512_setr_ps(
47833 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47834 );
47835 let src = _mm512_set1_epi32(0);
47836 let r = _mm512_mask_cvtps_epu32(src, 0, a);
47837 assert_eq_m512i(r, src);
47838 let r = _mm512_mask_cvtps_epu32(src, 0b00000000_11111111, a);
47839 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47840 assert_eq_m512i(r, e);
47841 }
47842
47843 #[simd_test(enable = "avx512f")]
47844 unsafe fn test_mm512_maskz_cvtps_epu32() {
47845 let a = _mm512_setr_ps(
47846 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47847 );
47848 let r = _mm512_maskz_cvtps_epu32(0, a);
47849 assert_eq_m512i(r, _mm512_setzero_si512());
47850 let r = _mm512_maskz_cvtps_epu32(0b00000000_11111111, a);
47851 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47852 assert_eq_m512i(r, e);
47853 }
47854
47855 #[simd_test(enable = "avx512f,avx512vl")]
47856 unsafe fn test_mm256_cvtps_epu32() {
47857 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47858 let r = _mm256_cvtps_epu32(a);
47859 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47860 assert_eq_m256i(r, e);
47861 }
47862
47863 #[simd_test(enable = "avx512f,avx512vl")]
47864 unsafe fn test_mm256_mask_cvtps_epu32() {
47865 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47866 let src = _mm256_set1_epi32(0);
47867 let r = _mm256_mask_cvtps_epu32(src, 0, a);
47868 assert_eq_m256i(r, src);
47869 let r = _mm256_mask_cvtps_epu32(src, 0b11111111, a);
47870 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47871 assert_eq_m256i(r, e);
47872 }
47873
47874 #[simd_test(enable = "avx512f,avx512vl")]
47875 unsafe fn test_mm256_maskz_cvtps_epu32() {
47876 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47877 let r = _mm256_maskz_cvtps_epu32(0, a);
47878 assert_eq_m256i(r, _mm256_setzero_si256());
47879 let r = _mm256_maskz_cvtps_epu32(0b11111111, a);
47880 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47881 assert_eq_m256i(r, e);
47882 }
47883
47884 #[simd_test(enable = "avx512f,avx512vl")]
47885 unsafe fn test_mm_cvtps_epu32() {
47886 let a = _mm_set_ps(12., 13.5, 14., 15.5);
47887 let r = _mm_cvtps_epu32(a);
47888 let e = _mm_set_epi32(12, 14, 14, 16);
47889 assert_eq_m128i(r, e);
47890 }
47891
47892 #[simd_test(enable = "avx512f,avx512vl")]
47893 unsafe fn test_mm_mask_cvtps_epu32() {
47894 let a = _mm_set_ps(12., 13.5, 14., 15.5);
47895 let src = _mm_set1_epi32(0);
47896 let r = _mm_mask_cvtps_epu32(src, 0, a);
47897 assert_eq_m128i(r, src);
47898 let r = _mm_mask_cvtps_epu32(src, 0b00001111, a);
47899 let e = _mm_set_epi32(12, 14, 14, 16);
47900 assert_eq_m128i(r, e);
47901 }
47902
47903 #[simd_test(enable = "avx512f,avx512vl")]
47904 unsafe fn test_mm_maskz_cvtps_epu32() {
47905 let a = _mm_set_ps(12., 13.5, 14., 15.5);
47906 let r = _mm_maskz_cvtps_epu32(0, a);
47907 assert_eq_m128i(r, _mm_setzero_si128());
47908 let r = _mm_maskz_cvtps_epu32(0b00001111, a);
47909 let e = _mm_set_epi32(12, 14, 14, 16);
47910 assert_eq_m128i(r, e);
47911 }
47912
47913 #[simd_test(enable = "avx512f")]
47914 unsafe fn test_mm512_cvtepi8_epi32() {
47915 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47916 let r = _mm512_cvtepi8_epi32(a);
47917 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47918 assert_eq_m512i(r, e);
47919 }
47920
47921 #[simd_test(enable = "avx512f")]
47922 unsafe fn test_mm512_mask_cvtepi8_epi32() {
47923 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47924 let src = _mm512_set1_epi32(-1);
47925 let r = _mm512_mask_cvtepi8_epi32(src, 0, a);
47926 assert_eq_m512i(r, src);
47927 let r = _mm512_mask_cvtepi8_epi32(src, 0b00000000_11111111, a);
47928 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47929 assert_eq_m512i(r, e);
47930 }
47931
47932 #[simd_test(enable = "avx512f")]
47933 unsafe fn test_mm512_maskz_cvtepi8_epi32() {
47934 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47935 let r = _mm512_maskz_cvtepi8_epi32(0, a);
47936 assert_eq_m512i(r, _mm512_setzero_si512());
47937 let r = _mm512_maskz_cvtepi8_epi32(0b00000000_11111111, a);
47938 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
47939 assert_eq_m512i(r, e);
47940 }
47941
47942 #[simd_test(enable = "avx512f,avx512vl")]
47943 unsafe fn test_mm256_mask_cvtepi8_epi32() {
47944 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47945 let src = _mm256_set1_epi32(-1);
47946 let r = _mm256_mask_cvtepi8_epi32(src, 0, a);
47947 assert_eq_m256i(r, src);
47948 let r = _mm256_mask_cvtepi8_epi32(src, 0b11111111, a);
47949 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47950 assert_eq_m256i(r, e);
47951 }
47952
47953 #[simd_test(enable = "avx512f,avx512vl")]
47954 unsafe fn test_mm256_maskz_cvtepi8_epi32() {
47955 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47956 let r = _mm256_maskz_cvtepi8_epi32(0, a);
47957 assert_eq_m256i(r, _mm256_setzero_si256());
47958 let r = _mm256_maskz_cvtepi8_epi32(0b11111111, a);
47959 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47960 assert_eq_m256i(r, e);
47961 }
47962
47963 #[simd_test(enable = "avx512f,avx512vl")]
47964 unsafe fn test_mm_mask_cvtepi8_epi32() {
47965 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47966 let src = _mm_set1_epi32(-1);
47967 let r = _mm_mask_cvtepi8_epi32(src, 0, a);
47968 assert_eq_m128i(r, src);
47969 let r = _mm_mask_cvtepi8_epi32(src, 0b00001111, a);
47970 let e = _mm_set_epi32(12, 13, 14, 15);
47971 assert_eq_m128i(r, e);
47972 }
47973
47974 #[simd_test(enable = "avx512f,avx512vl")]
47975 unsafe fn test_mm_maskz_cvtepi8_epi32() {
47976 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47977 let r = _mm_maskz_cvtepi8_epi32(0, a);
47978 assert_eq_m128i(r, _mm_setzero_si128());
47979 let r = _mm_maskz_cvtepi8_epi32(0b00001111, a);
47980 let e = _mm_set_epi32(12, 13, 14, 15);
47981 assert_eq_m128i(r, e);
47982 }
47983
47984 #[simd_test(enable = "avx512f")]
47985 unsafe fn test_mm512_cvtepu8_epi32() {
47986 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47987 let r = _mm512_cvtepu8_epi32(a);
47988 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47989 assert_eq_m512i(r, e);
47990 }
47991
47992 #[simd_test(enable = "avx512f")]
47993 unsafe fn test_mm512_mask_cvtepu8_epi32() {
47994 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47995 let src = _mm512_set1_epi32(-1);
47996 let r = _mm512_mask_cvtepu8_epi32(src, 0, a);
47997 assert_eq_m512i(r, src);
47998 let r = _mm512_mask_cvtepu8_epi32(src, 0b00000000_11111111, a);
47999 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48000 assert_eq_m512i(r, e);
48001 }
48002
48003 #[simd_test(enable = "avx512f")]
48004 unsafe fn test_mm512_maskz_cvtepu8_epi32() {
48005 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48006 let r = _mm512_maskz_cvtepu8_epi32(0, a);
48007 assert_eq_m512i(r, _mm512_setzero_si512());
48008 let r = _mm512_maskz_cvtepu8_epi32(0b00000000_11111111, a);
48009 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48010 assert_eq_m512i(r, e);
48011 }
48012
48013 #[simd_test(enable = "avx512f,avx512vl")]
48014 unsafe fn test_mm256_mask_cvtepu8_epi32() {
48015 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48016 let src = _mm256_set1_epi32(-1);
48017 let r = _mm256_mask_cvtepu8_epi32(src, 0, a);
48018 assert_eq_m256i(r, src);
48019 let r = _mm256_mask_cvtepu8_epi32(src, 0b11111111, a);
48020 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48021 assert_eq_m256i(r, e);
48022 }
48023
48024 #[simd_test(enable = "avx512f,avx512vl")]
48025 unsafe fn test_mm256_maskz_cvtepu8_epi32() {
48026 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48027 let r = _mm256_maskz_cvtepu8_epi32(0, a);
48028 assert_eq_m256i(r, _mm256_setzero_si256());
48029 let r = _mm256_maskz_cvtepu8_epi32(0b11111111, a);
48030 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48031 assert_eq_m256i(r, e);
48032 }
48033
48034 #[simd_test(enable = "avx512f,avx512vl")]
48035 unsafe fn test_mm_mask_cvtepu8_epi32() {
48036 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48037 let src = _mm_set1_epi32(-1);
48038 let r = _mm_mask_cvtepu8_epi32(src, 0, a);
48039 assert_eq_m128i(r, src);
48040 let r = _mm_mask_cvtepu8_epi32(src, 0b00001111, a);
48041 let e = _mm_set_epi32(12, 13, 14, 15);
48042 assert_eq_m128i(r, e);
48043 }
48044
48045 #[simd_test(enable = "avx512f,avx512vl")]
48046 unsafe fn test_mm_maskz_cvtepu8_epi32() {
48047 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48048 let r = _mm_maskz_cvtepu8_epi32(0, a);
48049 assert_eq_m128i(r, _mm_setzero_si128());
48050 let r = _mm_maskz_cvtepu8_epi32(0b00001111, a);
48051 let e = _mm_set_epi32(12, 13, 14, 15);
48052 assert_eq_m128i(r, e);
48053 }
48054
48055 #[simd_test(enable = "avx512f")]
48056 unsafe fn test_mm512_cvtepi16_epi32() {
48057 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48058 let r = _mm512_cvtepi16_epi32(a);
48059 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48060 assert_eq_m512i(r, e);
48061 }
48062
48063 #[simd_test(enable = "avx512f")]
48064 unsafe fn test_mm512_mask_cvtepi16_epi32() {
48065 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48066 let src = _mm512_set1_epi32(-1);
48067 let r = _mm512_mask_cvtepi16_epi32(src, 0, a);
48068 assert_eq_m512i(r, src);
48069 let r = _mm512_mask_cvtepi16_epi32(src, 0b00000000_11111111, a);
48070 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48071 assert_eq_m512i(r, e);
48072 }
48073
48074 #[simd_test(enable = "avx512f")]
48075 unsafe fn test_mm512_maskz_cvtepi16_epi32() {
48076 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48077 let r = _mm512_maskz_cvtepi16_epi32(0, a);
48078 assert_eq_m512i(r, _mm512_setzero_si512());
48079 let r = _mm512_maskz_cvtepi16_epi32(0b00000000_11111111, a);
48080 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48081 assert_eq_m512i(r, e);
48082 }
48083
48084 #[simd_test(enable = "avx512f,avx512vl")]
48085 unsafe fn test_mm256_mask_cvtepi16_epi32() {
48086 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48087 let src = _mm256_set1_epi32(-1);
48088 let r = _mm256_mask_cvtepi16_epi32(src, 0, a);
48089 assert_eq_m256i(r, src);
48090 let r = _mm256_mask_cvtepi16_epi32(src, 0b11111111, a);
48091 let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48092 assert_eq_m256i(r, e);
48093 }
48094
48095 #[simd_test(enable = "avx512f,avx512vl")]
48096 unsafe fn test_mm256_maskz_cvtepi16_epi32() {
48097 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48098 let r = _mm256_maskz_cvtepi16_epi32(0, a);
48099 assert_eq_m256i(r, _mm256_setzero_si256());
48100 let r = _mm256_maskz_cvtepi16_epi32(0b11111111, a);
48101 let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48102 assert_eq_m256i(r, e);
48103 }
48104
48105 #[simd_test(enable = "avx512f,avx512vl")]
48106 unsafe fn test_mm_mask_cvtepi16_epi32() {
48107 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48108 let src = _mm_set1_epi32(-1);
48109 let r = _mm_mask_cvtepi16_epi32(src, 0, a);
48110 assert_eq_m128i(r, src);
48111 let r = _mm_mask_cvtepi16_epi32(src, 0b00001111, a);
48112 let e = _mm_set_epi32(4, 5, 6, 7);
48113 assert_eq_m128i(r, e);
48114 }
48115
48116 #[simd_test(enable = "avx512f,avx512vl")]
48117 unsafe fn test_mm_maskz_cvtepi16_epi32() {
48118 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48119 let r = _mm_maskz_cvtepi16_epi32(0, a);
48120 assert_eq_m128i(r, _mm_setzero_si128());
48121 let r = _mm_maskz_cvtepi16_epi32(0b00001111, a);
48122 let e = _mm_set_epi32(4, 5, 6, 7);
48123 assert_eq_m128i(r, e);
48124 }
48125
48126 #[simd_test(enable = "avx512f")]
48127 unsafe fn test_mm512_cvtepu16_epi32() {
48128 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48129 let r = _mm512_cvtepu16_epi32(a);
48130 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48131 assert_eq_m512i(r, e);
48132 }
48133
48134 #[simd_test(enable = "avx512f")]
48135 unsafe fn test_mm512_mask_cvtepu16_epi32() {
48136 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48137 let src = _mm512_set1_epi32(-1);
48138 let r = _mm512_mask_cvtepu16_epi32(src, 0, a);
48139 assert_eq_m512i(r, src);
48140 let r = _mm512_mask_cvtepu16_epi32(src, 0b00000000_11111111, a);
48141 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48142 assert_eq_m512i(r, e);
48143 }
48144
48145 #[simd_test(enable = "avx512f")]
48146 unsafe fn test_mm512_maskz_cvtepu16_epi32() {
48147 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48148 let r = _mm512_maskz_cvtepu16_epi32(0, a);
48149 assert_eq_m512i(r, _mm512_setzero_si512());
48150 let r = _mm512_maskz_cvtepu16_epi32(0b00000000_11111111, a);
48151 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48152 assert_eq_m512i(r, e);
48153 }
48154
48155 #[simd_test(enable = "avx512f,avx512vl")]
48156 unsafe fn test_mm256_mask_cvtepu16_epi32() {
48157 let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48158 let src = _mm256_set1_epi32(-1);
48159 let r = _mm256_mask_cvtepu16_epi32(src, 0, a);
48160 assert_eq_m256i(r, src);
48161 let r = _mm256_mask_cvtepu16_epi32(src, 0b11111111, a);
48162 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48163 assert_eq_m256i(r, e);
48164 }
48165
48166 #[simd_test(enable = "avx512f,avx512vl")]
48167 unsafe fn test_mm256_maskz_cvtepu16_epi32() {
48168 let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48169 let r = _mm256_maskz_cvtepu16_epi32(0, a);
48170 assert_eq_m256i(r, _mm256_setzero_si256());
48171 let r = _mm256_maskz_cvtepu16_epi32(0b11111111, a);
48172 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48173 assert_eq_m256i(r, e);
48174 }
48175
48176 #[simd_test(enable = "avx512f,avx512vl")]
48177 unsafe fn test_mm_mask_cvtepu16_epi32() {
48178 let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48179 let src = _mm_set1_epi32(-1);
48180 let r = _mm_mask_cvtepu16_epi32(src, 0, a);
48181 assert_eq_m128i(r, src);
48182 let r = _mm_mask_cvtepu16_epi32(src, 0b00001111, a);
48183 let e = _mm_set_epi32(12, 13, 14, 15);
48184 assert_eq_m128i(r, e);
48185 }
48186
48187 #[simd_test(enable = "avx512f,avx512vl")]
48188 unsafe fn test_mm_maskz_cvtepu16_epi32() {
48189 let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48190 let r = _mm_maskz_cvtepu16_epi32(0, a);
48191 assert_eq_m128i(r, _mm_setzero_si128());
48192 let r = _mm_maskz_cvtepu16_epi32(0b00001111, a);
48193 let e = _mm_set_epi32(12, 13, 14, 15);
48194 assert_eq_m128i(r, e);
48195 }
48196
48197 #[simd_test(enable = "avx512f")]
48198 unsafe fn test_mm512_cvtepi32_ps() {
48199 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48200 let r = _mm512_cvtepi32_ps(a);
48201 let e = _mm512_set_ps(
48202 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
48203 );
48204 assert_eq_m512(r, e);
48205 }
48206
48207 #[simd_test(enable = "avx512f")]
48208 unsafe fn test_mm512_mask_cvtepi32_ps() {
48209 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48210 let src = _mm512_set1_ps(-1.);
48211 let r = _mm512_mask_cvtepi32_ps(src, 0, a);
48212 assert_eq_m512(r, src);
48213 let r = _mm512_mask_cvtepi32_ps(src, 0b00000000_11111111, a);
48214 let e = _mm512_set_ps(
48215 -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
48216 );
48217 assert_eq_m512(r, e);
48218 }
48219
48220 #[simd_test(enable = "avx512f")]
48221 unsafe fn test_mm512_maskz_cvtepi32_ps() {
48222 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48223 let r = _mm512_maskz_cvtepi32_ps(0, a);
48224 assert_eq_m512(r, _mm512_setzero_ps());
48225 let r = _mm512_maskz_cvtepi32_ps(0b00000000_11111111, a);
48226 let e = _mm512_set_ps(
48227 0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
48228 );
48229 assert_eq_m512(r, e);
48230 }
48231
48232 #[simd_test(enable = "avx512f,avx512vl")]
48233 unsafe fn test_mm256_mask_cvtepi32_ps() {
48234 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48235 let src = _mm256_set1_ps(-1.);
48236 let r = _mm256_mask_cvtepi32_ps(src, 0, a);
48237 assert_eq_m256(r, src);
48238 let r = _mm256_mask_cvtepi32_ps(src, 0b11111111, a);
48239 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
48240 assert_eq_m256(r, e);
48241 }
48242
48243 #[simd_test(enable = "avx512f,avx512vl")]
48244 unsafe fn test_mm256_maskz_cvtepi32_ps() {
48245 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48246 let r = _mm256_maskz_cvtepi32_ps(0, a);
48247 assert_eq_m256(r, _mm256_setzero_ps());
48248 let r = _mm256_maskz_cvtepi32_ps(0b11111111, a);
48249 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
48250 assert_eq_m256(r, e);
48251 }
48252
48253 #[simd_test(enable = "avx512f,avx512vl")]
48254 unsafe fn test_mm_mask_cvtepi32_ps() {
48255 let a = _mm_set_epi32(1, 2, 3, 4);
48256 let src = _mm_set1_ps(-1.);
48257 let r = _mm_mask_cvtepi32_ps(src, 0, a);
48258 assert_eq_m128(r, src);
48259 let r = _mm_mask_cvtepi32_ps(src, 0b00001111, a);
48260 let e = _mm_set_ps(1., 2., 3., 4.);
48261 assert_eq_m128(r, e);
48262 }
48263
48264 #[simd_test(enable = "avx512f,avx512vl")]
48265 unsafe fn test_mm_maskz_cvtepi32_ps() {
48266 let a = _mm_set_epi32(1, 2, 3, 4);
48267 let r = _mm_maskz_cvtepi32_ps(0, a);
48268 assert_eq_m128(r, _mm_setzero_ps());
48269 let r = _mm_maskz_cvtepi32_ps(0b00001111, a);
48270 let e = _mm_set_ps(1., 2., 3., 4.);
48271 assert_eq_m128(r, e);
48272 }
48273
48274 #[simd_test(enable = "avx512f")]
48275 unsafe fn test_mm512_cvtepu32_ps() {
48276 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48277 let r = _mm512_cvtepu32_ps(a);
48278 let e = _mm512_set_ps(
48279 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
48280 );
48281 assert_eq_m512(r, e);
48282 }
48283
48284 #[simd_test(enable = "avx512f")]
48285 unsafe fn test_mm512_mask_cvtepu32_ps() {
48286 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48287 let src = _mm512_set1_ps(-1.);
48288 let r = _mm512_mask_cvtepu32_ps(src, 0, a);
48289 assert_eq_m512(r, src);
48290 let r = _mm512_mask_cvtepu32_ps(src, 0b00000000_11111111, a);
48291 let e = _mm512_set_ps(
48292 -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
48293 );
48294 assert_eq_m512(r, e);
48295 }
48296
48297 #[simd_test(enable = "avx512f")]
48298 unsafe fn test_mm512_maskz_cvtepu32_ps() {
48299 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48300 let r = _mm512_maskz_cvtepu32_ps(0, a);
48301 assert_eq_m512(r, _mm512_setzero_ps());
48302 let r = _mm512_maskz_cvtepu32_ps(0b00000000_11111111, a);
48303 let e = _mm512_set_ps(
48304 0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
48305 );
48306 assert_eq_m512(r, e);
48307 }
48308
48309 #[simd_test(enable = "avx512f")]
48310 unsafe fn test_mm512_cvtepi32_epi16() {
48311 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48312 let r = _mm512_cvtepi32_epi16(a);
48313 let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48314 assert_eq_m256i(r, e);
48315 }
48316
48317 #[simd_test(enable = "avx512f")]
48318 unsafe fn test_mm512_mask_cvtepi32_epi16() {
48319 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48320 let src = _mm256_set1_epi16(-1);
48321 let r = _mm512_mask_cvtepi32_epi16(src, 0, a);
48322 assert_eq_m256i(r, src);
48323 let r = _mm512_mask_cvtepi32_epi16(src, 0b00000000_11111111, a);
48324 let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48325 assert_eq_m256i(r, e);
48326 }
48327
48328 #[simd_test(enable = "avx512f")]
48329 unsafe fn test_mm512_maskz_cvtepi32_epi16() {
48330 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48331 let r = _mm512_maskz_cvtepi32_epi16(0, a);
48332 assert_eq_m256i(r, _mm256_setzero_si256());
48333 let r = _mm512_maskz_cvtepi32_epi16(0b00000000_11111111, a);
48334 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48335 assert_eq_m256i(r, e);
48336 }
48337
48338 #[simd_test(enable = "avx512f,avx512vl")]
48339 unsafe fn test_mm256_cvtepi32_epi16() {
48340 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48341 let r = _mm256_cvtepi32_epi16(a);
48342 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48343 assert_eq_m128i(r, e);
48344 }
48345
48346 #[simd_test(enable = "avx512f,avx512vl")]
48347 unsafe fn test_mm256_mask_cvtepi32_epi16() {
48348 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48349 let src = _mm_set1_epi16(-1);
48350 let r = _mm256_mask_cvtepi32_epi16(src, 0, a);
48351 assert_eq_m128i(r, src);
48352 let r = _mm256_mask_cvtepi32_epi16(src, 0b11111111, a);
48353 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48354 assert_eq_m128i(r, e);
48355 }
48356
48357 #[simd_test(enable = "avx512f,avx512vl")]
48358 unsafe fn test_mm256_maskz_cvtepi32_epi16() {
48359 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48360 let r = _mm256_maskz_cvtepi32_epi16(0, a);
48361 assert_eq_m128i(r, _mm_setzero_si128());
48362 let r = _mm256_maskz_cvtepi32_epi16(0b11111111, a);
48363 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48364 assert_eq_m128i(r, e);
48365 }
48366
48367 #[simd_test(enable = "avx512f,avx512vl")]
48368 unsafe fn test_mm_cvtepi32_epi16() {
48369 let a = _mm_set_epi32(4, 5, 6, 7);
48370 let r = _mm_cvtepi32_epi16(a);
48371 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48372 assert_eq_m128i(r, e);
48373 }
48374
48375 #[simd_test(enable = "avx512f,avx512vl")]
48376 unsafe fn test_mm_mask_cvtepi32_epi16() {
48377 let a = _mm_set_epi32(4, 5, 6, 7);
48378 let src = _mm_set1_epi16(0);
48379 let r = _mm_mask_cvtepi32_epi16(src, 0, a);
48380 assert_eq_m128i(r, src);
48381 let r = _mm_mask_cvtepi32_epi16(src, 0b00001111, a);
48382 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48383 assert_eq_m128i(r, e);
48384 }
48385
48386 #[simd_test(enable = "avx512f,avx512vl")]
48387 unsafe fn test_mm_maskz_cvtepi32_epi16() {
48388 let a = _mm_set_epi32(4, 5, 6, 7);
48389 let r = _mm_maskz_cvtepi32_epi16(0, a);
48390 assert_eq_m128i(r, _mm_setzero_si128());
48391 let r = _mm_maskz_cvtepi32_epi16(0b00001111, a);
48392 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48393 assert_eq_m128i(r, e);
48394 }
48395
48396 #[simd_test(enable = "avx512f")]
48397 unsafe fn test_mm512_cvtepi32_epi8() {
48398 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48399 let r = _mm512_cvtepi32_epi8(a);
48400 let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48401 assert_eq_m128i(r, e);
48402 }
48403
48404 #[simd_test(enable = "avx512f")]
48405 unsafe fn test_mm512_mask_cvtepi32_epi8() {
48406 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48407 let src = _mm_set1_epi8(-1);
48408 let r = _mm512_mask_cvtepi32_epi8(src, 0, a);
48409 assert_eq_m128i(r, src);
48410 let r = _mm512_mask_cvtepi32_epi8(src, 0b00000000_11111111, a);
48411 let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48412 assert_eq_m128i(r, e);
48413 }
48414
48415 #[simd_test(enable = "avx512f")]
48416 unsafe fn test_mm512_maskz_cvtepi32_epi8() {
48417 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48418 let r = _mm512_maskz_cvtepi32_epi8(0, a);
48419 assert_eq_m128i(r, _mm_setzero_si128());
48420 let r = _mm512_maskz_cvtepi32_epi8(0b00000000_11111111, a);
48421 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48422 assert_eq_m128i(r, e);
48423 }
48424
48425 #[simd_test(enable = "avx512f,avx512vl")]
48426 unsafe fn test_mm256_cvtepi32_epi8() {
48427 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48428 let r = _mm256_cvtepi32_epi8(a);
48429 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48430 assert_eq_m128i(r, e);
48431 }
48432
48433 #[simd_test(enable = "avx512f,avx512vl")]
48434 unsafe fn test_mm256_mask_cvtepi32_epi8() {
48435 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48436 let src = _mm_set1_epi8(0);
48437 let r = _mm256_mask_cvtepi32_epi8(src, 0, a);
48438 assert_eq_m128i(r, src);
48439 let r = _mm256_mask_cvtepi32_epi8(src, 0b11111111, a);
48440 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48441 assert_eq_m128i(r, e);
48442 }
48443
48444 #[simd_test(enable = "avx512f,avx512vl")]
48445 unsafe fn test_mm256_maskz_cvtepi32_epi8() {
48446 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48447 let r = _mm256_maskz_cvtepi32_epi8(0, a);
48448 assert_eq_m128i(r, _mm_setzero_si128());
48449 let r = _mm256_maskz_cvtepi32_epi8(0b11111111, a);
48450 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48451 assert_eq_m128i(r, e);
48452 }
48453
48454 #[simd_test(enable = "avx512f,avx512vl")]
48455 unsafe fn test_mm_cvtepi32_epi8() {
48456 let a = _mm_set_epi32(4, 5, 6, 7);
48457 let r = _mm_cvtepi32_epi8(a);
48458 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48459 assert_eq_m128i(r, e);
48460 }
48461
48462 #[simd_test(enable = "avx512f,avx512vl")]
48463 unsafe fn test_mm_mask_cvtepi32_epi8() {
48464 let a = _mm_set_epi32(4, 5, 6, 7);
48465 let src = _mm_set1_epi8(0);
48466 let r = _mm_mask_cvtepi32_epi8(src, 0, a);
48467 assert_eq_m128i(r, src);
48468 let r = _mm_mask_cvtepi32_epi8(src, 0b00001111, a);
48469 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48470 assert_eq_m128i(r, e);
48471 }
48472
48473 #[simd_test(enable = "avx512f,avx512vl")]
48474 unsafe fn test_mm_maskz_cvtepi32_epi8() {
48475 let a = _mm_set_epi32(4, 5, 6, 7);
48476 let r = _mm_maskz_cvtepi32_epi8(0, a);
48477 assert_eq_m128i(r, _mm_setzero_si128());
48478 let r = _mm_maskz_cvtepi32_epi8(0b00001111, a);
48479 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48480 assert_eq_m128i(r, e);
48481 }
48482
48483 #[simd_test(enable = "avx512f")]
48484 unsafe fn test_mm512_cvtsepi32_epi16() {
48485 #[rustfmt::skip]
48486 let a = _mm512_set_epi32(
48487 0, 1, 2, 3,
48488 4, 5, 6, 7,
48489 8, 9, 10, 11,
48490 12, 13, i32::MIN, i32::MAX,
48491 );
48492 let r = _mm512_cvtsepi32_epi16(a);
48493 #[rustfmt::skip]
48494 let e = _mm256_set_epi16(
48495 0, 1, 2, 3,
48496 4, 5, 6, 7,
48497 8, 9, 10, 11,
48498 12, 13, i16::MIN, i16::MAX,
48499 );
48500 assert_eq_m256i(r, e);
48501 }
48502
48503 #[simd_test(enable = "avx512f")]
48504 unsafe fn test_mm512_mask_cvtsepi32_epi16() {
48505 #[rustfmt::skip]
48506 let a = _mm512_set_epi32(
48507 0, 1, 2, 3,
48508 4, 5, 6, 7,
48509 8, 9, 10, 11,
48510 12, 13, i32::MIN, i32::MAX,
48511 );
48512 let src = _mm256_set1_epi16(-1);
48513 let r = _mm512_mask_cvtsepi32_epi16(src, 0, a);
48514 assert_eq_m256i(r, src);
48515 let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a);
48516 #[rustfmt::skip]
48517 let e = _mm256_set_epi16(
48518 -1, -1, -1, -1,
48519 -1, -1, -1, -1,
48520 8, 9, 10, 11,
48521 12, 13, i16::MIN, i16::MAX,
48522 );
48523 assert_eq_m256i(r, e);
48524 }
48525
48526 #[simd_test(enable = "avx512f")]
48527 unsafe fn test_mm512_maskz_cvtsepi32_epi16() {
48528 #[rustfmt::skip]
48529 let a = _mm512_set_epi32(
48530 0, 1, 2, 3,
48531 4, 5, 6, 7,
48532 8, 9, 10, 11,
48533 12, 13, i32::MIN, i32::MAX,
48534 );
48535 let r = _mm512_maskz_cvtsepi32_epi16(0, a);
48536 assert_eq_m256i(r, _mm256_setzero_si256());
48537 let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a);
48538 #[rustfmt::skip]
48539 let e = _mm256_set_epi16(
48540 0, 0, 0, 0,
48541 0, 0, 0, 0,
48542 8, 9, 10, 11,
48543 12, 13, i16::MIN, i16::MAX,
48544 );
48545 assert_eq_m256i(r, e);
48546 }
48547
48548 #[simd_test(enable = "avx512f,avx512vl")]
48549 unsafe fn test_mm256_cvtsepi32_epi16() {
48550 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48551 let r = _mm256_cvtsepi32_epi16(a);
48552 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48553 assert_eq_m128i(r, e);
48554 }
48555
48556 #[simd_test(enable = "avx512f,avx512vl")]
48557 unsafe fn test_mm256_mask_cvtsepi32_epi16() {
48558 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48559 let src = _mm_set1_epi16(-1);
48560 let r = _mm256_mask_cvtsepi32_epi16(src, 0, a);
48561 assert_eq_m128i(r, src);
48562 let r = _mm256_mask_cvtsepi32_epi16(src, 0b11111111, a);
48563 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48564 assert_eq_m128i(r, e);
48565 }
48566
48567 #[simd_test(enable = "avx512f,avx512vl")]
48568 unsafe fn test_mm256_maskz_cvtsepi32_epi16() {
48569 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48570 let r = _mm256_maskz_cvtsepi32_epi16(0, a);
48571 assert_eq_m128i(r, _mm_setzero_si128());
48572 let r = _mm256_maskz_cvtsepi32_epi16(0b11111111, a);
48573 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48574 assert_eq_m128i(r, e);
48575 }
48576
48577 #[simd_test(enable = "avx512f,avx512vl")]
48578 unsafe fn test_mm_cvtsepi32_epi16() {
48579 let a = _mm_set_epi32(4, 5, 6, 7);
48580 let r = _mm_cvtsepi32_epi16(a);
48581 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48582 assert_eq_m128i(r, e);
48583 }
48584
48585 #[simd_test(enable = "avx512f,avx512vl")]
48586 unsafe fn test_mm_mask_cvtsepi32_epi16() {
48587 let a = _mm_set_epi32(4, 5, 6, 7);
48588 let src = _mm_set1_epi16(0);
48589 let r = _mm_mask_cvtsepi32_epi16(src, 0, a);
48590 assert_eq_m128i(r, src);
48591 let r = _mm_mask_cvtsepi32_epi16(src, 0b11111111, a);
48592 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48593 assert_eq_m128i(r, e);
48594 }
48595
48596 #[simd_test(enable = "avx512f,avx512vl")]
48597 unsafe fn test_mm_maskz_cvtsepi32_epi16() {
48598 let a = _mm_set_epi32(4, 5, 6, 7);
48599 let r = _mm_maskz_cvtsepi32_epi16(0, a);
48600 assert_eq_m128i(r, _mm_setzero_si128());
48601 let r = _mm_maskz_cvtsepi32_epi16(0b11111111, a);
48602 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48603 assert_eq_m128i(r, e);
48604 }
48605
48606 #[simd_test(enable = "avx512f")]
48607 unsafe fn test_mm512_cvtsepi32_epi8() {
48608 #[rustfmt::skip]
48609 let a = _mm512_set_epi32(
48610 0, 1, 2, 3,
48611 4, 5, 6, 7,
48612 8, 9, 10, 11,
48613 12, 13, i32::MIN, i32::MAX,
48614 );
48615 let r = _mm512_cvtsepi32_epi8(a);
48616 #[rustfmt::skip]
48617 let e = _mm_set_epi8(
48618 0, 1, 2, 3,
48619 4, 5, 6, 7,
48620 8, 9, 10, 11,
48621 12, 13, i8::MIN, i8::MAX,
48622 );
48623 assert_eq_m128i(r, e);
48624 }
48625
48626 #[simd_test(enable = "avx512f")]
48627 unsafe fn test_mm512_mask_cvtsepi32_epi8() {
48628 #[rustfmt::skip]
48629 let a = _mm512_set_epi32(
48630 0, 1, 2, 3,
48631 4, 5, 6, 7,
48632 8, 9, 10, 11,
48633 12, 13, i32::MIN, i32::MAX,
48634 );
48635 let src = _mm_set1_epi8(-1);
48636 let r = _mm512_mask_cvtsepi32_epi8(src, 0, a);
48637 assert_eq_m128i(r, src);
48638 let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a);
48639 #[rustfmt::skip]
48640 let e = _mm_set_epi8(
48641 -1, -1, -1, -1,
48642 -1, -1, -1, -1,
48643 8, 9, 10, 11,
48644 12, 13, i8::MIN, i8::MAX,
48645 );
48646 assert_eq_m128i(r, e);
48647 }
48648
48649 #[simd_test(enable = "avx512f")]
48650 unsafe fn test_mm512_maskz_cvtsepi32_epi8() {
48651 #[rustfmt::skip]
48652 let a = _mm512_set_epi32(
48653 0, 1, 2, 3,
48654 4, 5, 6, 7,
48655 8, 9, 10, 11,
48656 12, 13, i32::MIN, i32::MAX,
48657 );
48658 let r = _mm512_maskz_cvtsepi32_epi8(0, a);
48659 assert_eq_m128i(r, _mm_setzero_si128());
48660 let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a);
48661 #[rustfmt::skip]
48662 let e = _mm_set_epi8(
48663 0, 0, 0, 0,
48664 0, 0, 0, 0,
48665 8, 9, 10, 11,
48666 12, 13, i8::MIN, i8::MAX,
48667 );
48668 assert_eq_m128i(r, e);
48669 }
48670
48671 #[simd_test(enable = "avx512f,avx512vl")]
48672 unsafe fn test_mm256_cvtsepi32_epi8() {
48673 let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48674 let r = _mm256_cvtsepi32_epi8(a);
48675 #[rustfmt::skip]
48676 let e = _mm_set_epi8(
48677 0, 0, 0, 0,
48678 0, 0, 0, 0,
48679 9, 10, 11, 12,
48680 13, 14, 15, 16,
48681 );
48682 assert_eq_m128i(r, e);
48683 }
48684
48685 #[simd_test(enable = "avx512f,avx512vl")]
48686 unsafe fn test_mm256_mask_cvtsepi32_epi8() {
48687 let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48688 let src = _mm_set1_epi8(0);
48689 let r = _mm256_mask_cvtsepi32_epi8(src, 0, a);
48690 assert_eq_m128i(r, src);
48691 let r = _mm256_mask_cvtsepi32_epi8(src, 0b11111111, a);
48692 #[rustfmt::skip]
48693 let e = _mm_set_epi8(
48694 0, 0, 0, 0,
48695 0, 0, 0, 0,
48696 9, 10, 11, 12,
48697 13, 14, 15, 16,
48698 );
48699 assert_eq_m128i(r, e);
48700 }
48701
48702 #[simd_test(enable = "avx512f,avx512vl")]
48703 unsafe fn test_mm256_maskz_cvtsepi32_epi8() {
48704 let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48705 let r = _mm256_maskz_cvtsepi32_epi8(0, a);
48706 assert_eq_m128i(r, _mm_setzero_si128());
48707 let r = _mm256_maskz_cvtsepi32_epi8(0b11111111, a);
48708 #[rustfmt::skip]
48709 let e = _mm_set_epi8(
48710 0, 0, 0, 0,
48711 0, 0, 0, 0,
48712 9, 10, 11, 12,
48713 13, 14, 15, 16,
48714 );
48715 assert_eq_m128i(r, e);
48716 }
48717
48718 #[simd_test(enable = "avx512f,avx512vl")]
48719 unsafe fn test_mm_cvtsepi32_epi8() {
48720 let a = _mm_set_epi32(13, 14, 15, 16);
48721 let r = _mm_cvtsepi32_epi8(a);
48722 #[rustfmt::skip]
48723 let e = _mm_set_epi8(
48724 0, 0, 0, 0,
48725 0, 0, 0, 0,
48726 0, 0, 0, 0,
48727 13, 14, 15, 16,
48728 );
48729 assert_eq_m128i(r, e);
48730 }
48731
48732 #[simd_test(enable = "avx512f,avx512vl")]
48733 unsafe fn test_mm_mask_cvtsepi32_epi8() {
48734 let a = _mm_set_epi32(13, 14, 15, 16);
48735 let src = _mm_set1_epi8(0);
48736 let r = _mm_mask_cvtsepi32_epi8(src, 0, a);
48737 assert_eq_m128i(r, src);
48738 let r = _mm_mask_cvtsepi32_epi8(src, 0b00001111, a);
48739 #[rustfmt::skip]
48740 let e = _mm_set_epi8(
48741 0, 0, 0, 0,
48742 0, 0, 0, 0,
48743 0, 0, 0, 0,
48744 13, 14, 15, 16,
48745 );
48746 assert_eq_m128i(r, e);
48747 }
48748
48749 #[simd_test(enable = "avx512f,avx512vl")]
48750 unsafe fn test_mm_maskz_cvtsepi32_epi8() {
48751 let a = _mm_set_epi32(13, 14, 15, 16);
48752 let r = _mm_maskz_cvtsepi32_epi8(0, a);
48753 assert_eq_m128i(r, _mm_setzero_si128());
48754 let r = _mm_maskz_cvtsepi32_epi8(0b00001111, a);
48755 #[rustfmt::skip]
48756 let e = _mm_set_epi8(
48757 0, 0, 0, 0,
48758 0, 0, 0, 0,
48759 0, 0, 0, 0,
48760 13, 14, 15, 16,
48761 );
48762 assert_eq_m128i(r, e);
48763 }
48764
48765 #[simd_test(enable = "avx512f")]
48766 unsafe fn test_mm512_cvtusepi32_epi16() {
48767 #[rustfmt::skip]
48768 let a = _mm512_set_epi32(
48769 0, 1, 2, 3,
48770 4, 5, 6, 7,
48771 8, 9, 10, 11,
48772 12, 13, i32::MIN, i32::MIN,
48773 );
48774 let r = _mm512_cvtusepi32_epi16(a);
48775 let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
48776 assert_eq_m256i(r, e);
48777 }
48778
48779 #[simd_test(enable = "avx512f")]
48780 unsafe fn test_mm512_mask_cvtusepi32_epi16() {
48781 #[rustfmt::skip]
48782 let a = _mm512_set_epi32(
48783 0, 1, 2, 3,
48784 4, 5, 6, 7,
48785 8, 9, 10, 11,
48786 12, 13, i32::MIN, i32::MIN,
48787 );
48788 let src = _mm256_set1_epi16(-1);
48789 let r = _mm512_mask_cvtusepi32_epi16(src, 0, a);
48790 assert_eq_m256i(r, src);
48791 let r = _mm512_mask_cvtusepi32_epi16(src, 0b00000000_11111111, a);
48792 let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
48793 assert_eq_m256i(r, e);
48794 }
48795
48796 #[simd_test(enable = "avx512f")]
48797 unsafe fn test_mm512_maskz_cvtusepi32_epi16() {
48798 #[rustfmt::skip]
48799 let a = _mm512_set_epi32(
48800 0, 1, 2, 3,
48801 4, 5, 6, 7,
48802 8, 9, 10, 11,
48803 12, 13, i32::MIN, i32::MIN,
48804 );
48805 let r = _mm512_maskz_cvtusepi32_epi16(0, a);
48806 assert_eq_m256i(r, _mm256_setzero_si256());
48807 let r = _mm512_maskz_cvtusepi32_epi16(0b00000000_11111111, a);
48808 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
48809 assert_eq_m256i(r, e);
48810 }
48811
48812 #[simd_test(enable = "avx512f,avx512vl")]
48813 unsafe fn test_mm256_cvtusepi32_epi16() {
48814 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48815 let r = _mm256_cvtusepi32_epi16(a);
48816 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48817 assert_eq_m128i(r, e);
48818 }
48819
48820 #[simd_test(enable = "avx512f,avx512vl")]
48821 unsafe fn test_mm256_mask_cvtusepi32_epi16() {
48822 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48823 let src = _mm_set1_epi16(0);
48824 let r = _mm256_mask_cvtusepi32_epi16(src, 0, a);
48825 assert_eq_m128i(r, src);
48826 let r = _mm256_mask_cvtusepi32_epi16(src, 0b11111111, a);
48827 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48828 assert_eq_m128i(r, e);
48829 }
48830
48831 #[simd_test(enable = "avx512f,avx512vl")]
48832 unsafe fn test_mm256_maskz_cvtusepi32_epi16() {
48833 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48834 let r = _mm256_maskz_cvtusepi32_epi16(0, a);
48835 assert_eq_m128i(r, _mm_setzero_si128());
48836 let r = _mm256_maskz_cvtusepi32_epi16(0b11111111, a);
48837 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48838 assert_eq_m128i(r, e);
48839 }
48840
48841 #[simd_test(enable = "avx512f,avx512vl")]
48842 unsafe fn test_mm_cvtusepi32_epi16() {
48843 let a = _mm_set_epi32(5, 6, 7, 8);
48844 let r = _mm_cvtusepi32_epi16(a);
48845 let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48846 assert_eq_m128i(r, e);
48847 }
48848
48849 #[simd_test(enable = "avx512f,avx512vl")]
48850 unsafe fn test_mm_mask_cvtusepi32_epi16() {
48851 let a = _mm_set_epi32(5, 6, 7, 8);
48852 let src = _mm_set1_epi16(0);
48853 let r = _mm_mask_cvtusepi32_epi16(src, 0, a);
48854 assert_eq_m128i(r, src);
48855 let r = _mm_mask_cvtusepi32_epi16(src, 0b00001111, a);
48856 let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48857 assert_eq_m128i(r, e);
48858 }
48859
48860 #[simd_test(enable = "avx512f,avx512vl")]
48861 unsafe fn test_mm_maskz_cvtusepi32_epi16() {
48862 let a = _mm_set_epi32(5, 6, 7, 8);
48863 let r = _mm_maskz_cvtusepi32_epi16(0, a);
48864 assert_eq_m128i(r, _mm_setzero_si128());
48865 let r = _mm_maskz_cvtusepi32_epi16(0b00001111, a);
48866 let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48867 assert_eq_m128i(r, e);
48868 }
48869
48870 #[simd_test(enable = "avx512f")]
48871 unsafe fn test_mm512_cvtusepi32_epi8() {
48872 #[rustfmt::skip]
48873 let a = _mm512_set_epi32(
48874 0, 1, 2, 3,
48875 4, 5, 6, 7,
48876 8, 9, 10, 11,
48877 12, 13, i32::MIN, i32::MIN,
48878 );
48879 let r = _mm512_cvtusepi32_epi8(a);
48880 let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
48881 assert_eq_m128i(r, e);
48882 }
48883
48884 #[simd_test(enable = "avx512f")]
48885 unsafe fn test_mm512_mask_cvtusepi32_epi8() {
48886 #[rustfmt::skip]
48887 let a = _mm512_set_epi32(
48888 0, 1, 2, 3,
48889 4, 5, 6, 7,
48890 8, 9, 10, 11,
48891 12, 13, i32::MIN, i32::MIN,
48892 );
48893 let src = _mm_set1_epi8(-1);
48894 let r = _mm512_mask_cvtusepi32_epi8(src, 0, a);
48895 assert_eq_m128i(r, src);
48896 let r = _mm512_mask_cvtusepi32_epi8(src, 0b00000000_11111111, a);
48897 let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
48898 assert_eq_m128i(r, e);
48899 }
48900
48901 #[simd_test(enable = "avx512f")]
48902 unsafe fn test_mm512_maskz_cvtusepi32_epi8() {
48903 #[rustfmt::skip]
48904 let a = _mm512_set_epi32(
48905 0, 1, 2, 3,
48906 4, 5, 6, 7,
48907 8, 9, 10, 11,
48908 12, 13, i32::MIN, i32::MIN,
48909 );
48910 let r = _mm512_maskz_cvtusepi32_epi8(0, a);
48911 assert_eq_m128i(r, _mm_setzero_si128());
48912 let r = _mm512_maskz_cvtusepi32_epi8(0b00000000_11111111, a);
48913 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
48914 assert_eq_m128i(r, e);
48915 }
48916
48917 #[simd_test(enable = "avx512f,avx512vl")]
48918 unsafe fn test_mm256_cvtusepi32_epi8() {
48919 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48920 let r = _mm256_cvtusepi32_epi8(a);
48921 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48922 assert_eq_m128i(r, e);
48923 }
48924
48925 #[simd_test(enable = "avx512f,avx512vl")]
48926 unsafe fn test_mm256_mask_cvtusepi32_epi8() {
48927 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48928 let src = _mm_set1_epi8(0);
48929 let r = _mm256_mask_cvtusepi32_epi8(src, 0, a);
48930 assert_eq_m128i(r, src);
48931 let r = _mm256_mask_cvtusepi32_epi8(src, 0b11111111, a);
48932 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48933 assert_eq_m128i(r, e);
48934 }
48935
48936 #[simd_test(enable = "avx512f,avx512vl")]
48937 unsafe fn test_mm256_maskz_cvtusepi32_epi8() {
48938 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48939 let r = _mm256_maskz_cvtusepi32_epi8(0, a);
48940 assert_eq_m128i(r, _mm_setzero_si128());
48941 let r = _mm256_maskz_cvtusepi32_epi8(0b11111111, a);
48942 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48943 assert_eq_m128i(r, e);
48944 }
48945
48946 #[simd_test(enable = "avx512f,avx512vl")]
48947 unsafe fn test_mm_cvtusepi32_epi8() {
48948 let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48949 let r = _mm_cvtusepi32_epi8(a);
48950 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48951 assert_eq_m128i(r, e);
48952 }
48953
48954 #[simd_test(enable = "avx512f,avx512vl")]
48955 unsafe fn test_mm_mask_cvtusepi32_epi8() {
48956 let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48957 let src = _mm_set1_epi8(0);
48958 let r = _mm_mask_cvtusepi32_epi8(src, 0, a);
48959 assert_eq_m128i(r, src);
48960 let r = _mm_mask_cvtusepi32_epi8(src, 0b00001111, a);
48961 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48962 assert_eq_m128i(r, e);
48963 }
48964
48965 #[simd_test(enable = "avx512f,avx512vl")]
48966 unsafe fn test_mm_maskz_cvtusepi32_epi8() {
48967 let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48968 let r = _mm_maskz_cvtusepi32_epi8(0, a);
48969 assert_eq_m128i(r, _mm_setzero_si128());
48970 let r = _mm_maskz_cvtusepi32_epi8(0b00001111, a);
48971 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48972 assert_eq_m128i(r, e);
48973 }
48974
48975 #[simd_test(enable = "avx512f")]
48976 unsafe fn test_mm512_cvt_roundps_epi32() {
48977 let a = _mm512_setr_ps(
48978 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48979 );
48980 let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48981 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
48982 assert_eq_m512i(r, e);
48983 let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
48984 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15);
48985 assert_eq_m512i(r, e);
48986 }
48987
48988 #[simd_test(enable = "avx512f")]
48989 unsafe fn test_mm512_mask_cvt_roundps_epi32() {
48990 let a = _mm512_setr_ps(
48991 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48992 );
48993 let src = _mm512_set1_epi32(0);
48994 let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48995 src, 0, a,
48996 );
48997 assert_eq_m512i(r, src);
48998 let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48999 src,
49000 0b00000000_11111111,
49001 a,
49002 );
49003 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
49004 assert_eq_m512i(r, e);
49005 }
49006
49007 #[simd_test(enable = "avx512f")]
49008 unsafe fn test_mm512_maskz_cvt_roundps_epi32() {
49009 let a = _mm512_setr_ps(
49010 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49011 );
49012 let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49013 0, a,
49014 );
49015 assert_eq_m512i(r, _mm512_setzero_si512());
49016 let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49017 0b00000000_11111111,
49018 a,
49019 );
49020 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
49021 assert_eq_m512i(r, e);
49022 }
49023
49024 #[simd_test(enable = "avx512f")]
49025 unsafe fn test_mm512_cvt_roundps_epu32() {
49026 let a = _mm512_setr_ps(
49027 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49028 );
49029 let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
49030 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
49031 assert_eq_m512i(r, e);
49032 let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
49033 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49034 assert_eq_m512i(r, e);
49035 }
49036
49037 #[simd_test(enable = "avx512f")]
49038 unsafe fn test_mm512_mask_cvt_roundps_epu32() {
49039 let a = _mm512_setr_ps(
49040 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49041 );
49042 let src = _mm512_set1_epi32(0);
49043 let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49044 src, 0, a,
49045 );
49046 assert_eq_m512i(r, src);
49047 let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49048 src,
49049 0b00000000_11111111,
49050 a,
49051 );
49052 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49053 assert_eq_m512i(r, e);
49054 }
49055
49056 #[simd_test(enable = "avx512f")]
49057 unsafe fn test_mm512_maskz_cvt_roundps_epu32() {
49058 let a = _mm512_setr_ps(
49059 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49060 );
49061 let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49062 0, a,
49063 );
49064 assert_eq_m512i(r, _mm512_setzero_si512());
49065 let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49066 0b00000000_11111111,
49067 a,
49068 );
49069 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49070 assert_eq_m512i(r, e);
49071 }
49072
49073 #[simd_test(enable = "avx512f")]
49074 unsafe fn test_mm512_cvt_roundepi32_ps() {
49075 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49076 let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
49077 let e = _mm512_setr_ps(
49078 0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16.,
49079 );
49080 assert_eq_m512(r, e);
49081 }
49082
49083 #[simd_test(enable = "avx512f")]
49084 unsafe fn test_mm512_mask_cvt_roundepi32_ps() {
49085 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49086 let src = _mm512_set1_ps(0.);
49087 let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49088 src, 0, a,
49089 );
49090 assert_eq_m512(r, src);
49091 let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49092 src,
49093 0b00000000_11111111,
49094 a,
49095 );
49096 let e = _mm512_setr_ps(
49097 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
49098 );
49099 assert_eq_m512(r, e);
49100 }
49101
49102 #[simd_test(enable = "avx512f")]
49103 unsafe fn test_mm512_maskz_cvt_roundepi32_ps() {
49104 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49105 let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49106 0, a,
49107 );
49108 assert_eq_m512(r, _mm512_setzero_ps());
49109 let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49110 0b00000000_11111111,
49111 a,
49112 );
49113 let e = _mm512_setr_ps(
49114 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
49115 );
49116 assert_eq_m512(r, e);
49117 }
49118
49119 #[simd_test(enable = "avx512f")]
49120 unsafe fn test_mm512_cvt_roundepu32_ps() {
49121 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49122 let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
49123 #[rustfmt::skip]
49124 let e = _mm512_setr_ps(
49125 0., 4294967300., 2., 4294967300.,
49126 4., 4294967300., 6., 4294967300.,
49127 8., 10., 10., 12.,
49128 12., 14., 14., 16.,
49129 );
49130 assert_eq_m512(r, e);
49131 }
49132
49133 #[simd_test(enable = "avx512f")]
49134 unsafe fn test_mm512_mask_cvt_roundepu32_ps() {
49135 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49136 let src = _mm512_set1_ps(0.);
49137 let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49138 src, 0, a,
49139 );
49140 assert_eq_m512(r, src);
49141 let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49142 src,
49143 0b00000000_11111111,
49144 a,
49145 );
49146 #[rustfmt::skip]
49147 let e = _mm512_setr_ps(
49148 0., 4294967300., 2., 4294967300.,
49149 4., 4294967300., 6., 4294967300.,
49150 0., 0., 0., 0.,
49151 0., 0., 0., 0.,
49152 );
49153 assert_eq_m512(r, e);
49154 }
49155
49156 #[simd_test(enable = "avx512f")]
49157 unsafe fn test_mm512_maskz_cvt_roundepu32_ps() {
49158 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49159 let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49160 0, a,
49161 );
49162 assert_eq_m512(r, _mm512_setzero_ps());
49163 let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49164 0b00000000_11111111,
49165 a,
49166 );
49167 #[rustfmt::skip]
49168 let e = _mm512_setr_ps(
49169 0., 4294967300., 2., 4294967300.,
49170 4., 4294967300., 6., 4294967300.,
49171 0., 0., 0., 0.,
49172 0., 0., 0., 0.,
49173 );
49174 assert_eq_m512(r, e);
49175 }
49176
49177 #[simd_test(enable = "avx512f")]
49178 unsafe fn test_mm512_cvt_roundps_ph() {
49179 let a = _mm512_set1_ps(1.);
49180 let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a);
49181 let e = _mm256_setr_epi64x(
49182 4323521613979991040,
49183 4323521613979991040,
49184 4323521613979991040,
49185 4323521613979991040,
49186 );
49187 assert_eq_m256i(r, e);
49188 }
49189
49190 #[simd_test(enable = "avx512f")]
49191 unsafe fn test_mm512_mask_cvt_roundps_ph() {
49192 let a = _mm512_set1_ps(1.);
49193 let src = _mm256_set1_epi16(0);
49194 let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49195 assert_eq_m256i(r, src);
49196 let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49197 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49198 assert_eq_m256i(r, e);
49199 }
49200
49201 #[simd_test(enable = "avx512f")]
49202 unsafe fn test_mm512_maskz_cvt_roundps_ph() {
49203 let a = _mm512_set1_ps(1.);
49204 let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49205 assert_eq_m256i(r, _mm256_setzero_si256());
49206 let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49207 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49208 assert_eq_m256i(r, e);
49209 }
49210
49211 #[simd_test(enable = "avx512f,avx512vl")]
49212 unsafe fn test_mm256_mask_cvt_roundps_ph() {
49213 let a = _mm256_set1_ps(1.);
49214 let src = _mm_set1_epi16(0);
49215 let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49216 assert_eq_m128i(r, src);
49217 let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
49218 let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49219 assert_eq_m128i(r, e);
49220 }
49221
49222 #[simd_test(enable = "avx512f,avx512vl")]
49223 unsafe fn test_mm256_maskz_cvt_roundps_ph() {
49224 let a = _mm256_set1_ps(1.);
49225 let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49226 assert_eq_m128i(r, _mm_setzero_si128());
49227 let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
49228 let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49229 assert_eq_m128i(r, e);
49230 }
49231
49232 #[simd_test(enable = "avx512f,avx512vl")]
49233 unsafe fn test_mm_mask_cvt_roundps_ph() {
49234 let a = _mm_set1_ps(1.);
49235 let src = _mm_set1_epi16(0);
49236 let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49237 assert_eq_m128i(r, src);
49238 let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
49239 let e = _mm_setr_epi64x(4323521613979991040, 0);
49240 assert_eq_m128i(r, e);
49241 }
49242
49243 #[simd_test(enable = "avx512f,avx512vl")]
49244 unsafe fn test_mm_maskz_cvt_roundps_ph() {
49245 let a = _mm_set1_ps(1.);
49246 let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49247 assert_eq_m128i(r, _mm_setzero_si128());
49248 let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
49249 let e = _mm_setr_epi64x(4323521613979991040, 0);
49250 assert_eq_m128i(r, e);
49251 }
49252
49253 #[simd_test(enable = "avx512f")]
49254 unsafe fn test_mm512_cvtps_ph() {
49255 let a = _mm512_set1_ps(1.);
49256 let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a);
49257 let e = _mm256_setr_epi64x(
49258 4323521613979991040,
49259 4323521613979991040,
49260 4323521613979991040,
49261 4323521613979991040,
49262 );
49263 assert_eq_m256i(r, e);
49264 }
49265
49266 #[simd_test(enable = "avx512f")]
49267 unsafe fn test_mm512_mask_cvtps_ph() {
49268 let a = _mm512_set1_ps(1.);
49269 let src = _mm256_set1_epi16(0);
49270 let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49271 assert_eq_m256i(r, src);
49272 let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49273 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49274 assert_eq_m256i(r, e);
49275 }
49276
49277 #[simd_test(enable = "avx512f")]
49278 unsafe fn test_mm512_maskz_cvtps_ph() {
49279 let a = _mm512_set1_ps(1.);
49280 let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49281 assert_eq_m256i(r, _mm256_setzero_si256());
49282 let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49283 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49284 assert_eq_m256i(r, e);
49285 }
49286
49287 #[simd_test(enable = "avx512f,avx512vl")]
49288 unsafe fn test_mm256_mask_cvtps_ph() {
49289 let a = _mm256_set1_ps(1.);
49290 let src = _mm_set1_epi16(0);
49291 let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49292 assert_eq_m128i(r, src);
49293 let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
49294 let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49295 assert_eq_m128i(r, e);
49296 }
49297
49298 #[simd_test(enable = "avx512f,avx512vl")]
49299 unsafe fn test_mm256_maskz_cvtps_ph() {
49300 let a = _mm256_set1_ps(1.);
49301 let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49302 assert_eq_m128i(r, _mm_setzero_si128());
49303 let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
49304 let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49305 assert_eq_m128i(r, e);
49306 }
49307
49308 #[simd_test(enable = "avx512f,avx512vl")]
49309 unsafe fn test_mm_mask_cvtps_ph() {
49310 let a = _mm_set1_ps(1.);
49311 let src = _mm_set1_epi16(0);
49312 let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49313 assert_eq_m128i(r, src);
49314 let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
49315 let e = _mm_setr_epi64x(4323521613979991040, 0);
49316 assert_eq_m128i(r, e);
49317 }
49318
49319 #[simd_test(enable = "avx512f,avx512vl")]
49320 unsafe fn test_mm_maskz_cvtps_ph() {
49321 let a = _mm_set1_ps(1.);
49322 let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49323 assert_eq_m128i(r, _mm_setzero_si128());
49324 let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
49325 let e = _mm_setr_epi64x(4323521613979991040, 0);
49326 assert_eq_m128i(r, e);
49327 }
49328
49329 #[simd_test(enable = "avx512f")]
49330 unsafe fn test_mm512_cvt_roundph_ps() {
49331 let a = _mm256_setr_epi64x(
49332 4323521613979991040,
49333 4323521613979991040,
49334 4323521613979991040,
49335 4323521613979991040,
49336 );
49337 let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a);
49338 let e = _mm512_set1_ps(1.);
49339 assert_eq_m512(r, e);
49340 }
49341
49342 #[simd_test(enable = "avx512f")]
49343 unsafe fn test_mm512_mask_cvt_roundph_ps() {
49344 let a = _mm256_setr_epi64x(
49345 4323521613979991040,
49346 4323521613979991040,
49347 4323521613979991040,
49348 4323521613979991040,
49349 );
49350 let src = _mm512_set1_ps(0.);
49351 let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0, a);
49352 assert_eq_m512(r, src);
49353 let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49354 let e = _mm512_setr_ps(
49355 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49356 );
49357 assert_eq_m512(r, e);
49358 }
49359
49360 #[simd_test(enable = "avx512f")]
49361 unsafe fn test_mm512_maskz_cvt_roundph_ps() {
49362 let a = _mm256_setr_epi64x(
49363 4323521613979991040,
49364 4323521613979991040,
49365 4323521613979991040,
49366 4323521613979991040,
49367 );
49368 let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0, a);
49369 assert_eq_m512(r, _mm512_setzero_ps());
49370 let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49371 let e = _mm512_setr_ps(
49372 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49373 );
49374 assert_eq_m512(r, e);
49375 }
49376
49377 #[simd_test(enable = "avx512f")]
49378 unsafe fn test_mm512_cvtph_ps() {
49379 let a = _mm256_setr_epi64x(
49380 4323521613979991040,
49381 4323521613979991040,
49382 4323521613979991040,
49383 4323521613979991040,
49384 );
49385 let r = _mm512_cvtph_ps(a);
49386 let e = _mm512_set1_ps(1.);
49387 assert_eq_m512(r, e);
49388 }
49389
49390 #[simd_test(enable = "avx512f")]
49391 unsafe fn test_mm512_mask_cvtph_ps() {
49392 let a = _mm256_setr_epi64x(
49393 4323521613979991040,
49394 4323521613979991040,
49395 4323521613979991040,
49396 4323521613979991040,
49397 );
49398 let src = _mm512_set1_ps(0.);
49399 let r = _mm512_mask_cvtph_ps(src, 0, a);
49400 assert_eq_m512(r, src);
49401 let r = _mm512_mask_cvtph_ps(src, 0b00000000_11111111, a);
49402 let e = _mm512_setr_ps(
49403 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49404 );
49405 assert_eq_m512(r, e);
49406 }
49407
49408 #[simd_test(enable = "avx512f")]
49409 unsafe fn test_mm512_maskz_cvtph_ps() {
49410 let a = _mm256_setr_epi64x(
49411 4323521613979991040,
49412 4323521613979991040,
49413 4323521613979991040,
49414 4323521613979991040,
49415 );
49416 let r = _mm512_maskz_cvtph_ps(0, a);
49417 assert_eq_m512(r, _mm512_setzero_ps());
49418 let r = _mm512_maskz_cvtph_ps(0b00000000_11111111, a);
49419 let e = _mm512_setr_ps(
49420 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49421 );
49422 assert_eq_m512(r, e);
49423 }
49424
49425 #[simd_test(enable = "avx512f,avx512vl")]
49426 unsafe fn test_mm256_mask_cvtph_ps() {
49427 let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49428 let src = _mm256_set1_ps(0.);
49429 let r = _mm256_mask_cvtph_ps(src, 0, a);
49430 assert_eq_m256(r, src);
49431 let r = _mm256_mask_cvtph_ps(src, 0b11111111, a);
49432 let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
49433 assert_eq_m256(r, e);
49434 }
49435
49436 #[simd_test(enable = "avx512f,avx512vl")]
49437 unsafe fn test_mm256_maskz_cvtph_ps() {
49438 let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49439 let r = _mm256_maskz_cvtph_ps(0, a);
49440 assert_eq_m256(r, _mm256_setzero_ps());
49441 let r = _mm256_maskz_cvtph_ps(0b11111111, a);
49442 let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
49443 assert_eq_m256(r, e);
49444 }
49445
49446 #[simd_test(enable = "avx512f,avx512vl")]
49447 unsafe fn test_mm_mask_cvtph_ps() {
49448 let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49449 let src = _mm_set1_ps(0.);
49450 let r = _mm_mask_cvtph_ps(src, 0, a);
49451 assert_eq_m128(r, src);
49452 let r = _mm_mask_cvtph_ps(src, 0b00001111, a);
49453 let e = _mm_setr_ps(1., 1., 1., 1.);
49454 assert_eq_m128(r, e);
49455 }
49456
49457 #[simd_test(enable = "avx512f,avx512vl")]
49458 unsafe fn test_mm_maskz_cvtph_ps() {
49459 let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49460 let r = _mm_maskz_cvtph_ps(0, a);
49461 assert_eq_m128(r, _mm_setzero_ps());
49462 let r = _mm_maskz_cvtph_ps(0b00001111, a);
49463 let e = _mm_setr_ps(1., 1., 1., 1.);
49464 assert_eq_m128(r, e);
49465 }
49466
49467 #[simd_test(enable = "avx512f")]
49468 unsafe fn test_mm512_cvtt_roundps_epi32() {
49469 let a = _mm512_setr_ps(
49470 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49471 );
49472 let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a);
49473 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
49474 assert_eq_m512i(r, e);
49475 }
49476
49477 #[simd_test(enable = "avx512f")]
49478 unsafe fn test_mm512_mask_cvtt_roundps_epi32() {
49479 let a = _mm512_setr_ps(
49480 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49481 );
49482 let src = _mm512_set1_epi32(0);
49483 let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0, a);
49484 assert_eq_m512i(r, src);
49485 let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49486 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49487 assert_eq_m512i(r, e);
49488 }
49489
49490 #[simd_test(enable = "avx512f")]
49491 unsafe fn test_mm512_maskz_cvtt_roundps_epi32() {
49492 let a = _mm512_setr_ps(
49493 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49494 );
49495 let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0, a);
49496 assert_eq_m512i(r, _mm512_setzero_si512());
49497 let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49498 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49499 assert_eq_m512i(r, e);
49500 }
49501
49502 #[simd_test(enable = "avx512f")]
49503 unsafe fn test_mm512_cvtt_roundps_epu32() {
49504 let a = _mm512_setr_ps(
49505 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49506 );
49507 let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a);
49508 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49509 assert_eq_m512i(r, e);
49510 }
49511
49512 #[simd_test(enable = "avx512f")]
49513 unsafe fn test_mm512_mask_cvtt_roundps_epu32() {
49514 let a = _mm512_setr_ps(
49515 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49516 );
49517 let src = _mm512_set1_epi32(0);
49518 let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0, a);
49519 assert_eq_m512i(r, src);
49520 let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49521 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49522 assert_eq_m512i(r, e);
49523 }
49524
49525 #[simd_test(enable = "avx512f")]
49526 unsafe fn test_mm512_maskz_cvtt_roundps_epu32() {
49527 let a = _mm512_setr_ps(
49528 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49529 );
49530 let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0, a);
49531 assert_eq_m512i(r, _mm512_setzero_si512());
49532 let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49533 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49534 assert_eq_m512i(r, e);
49535 }
49536
49537 #[simd_test(enable = "avx512f")]
49538 unsafe fn test_mm512_cvttps_epi32() {
49539 let a = _mm512_setr_ps(
49540 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49541 );
49542 let r = _mm512_cvttps_epi32(a);
49543 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
49544 assert_eq_m512i(r, e);
49545 }
49546
49547 #[simd_test(enable = "avx512f")]
49548 unsafe fn test_mm512_mask_cvttps_epi32() {
49549 let a = _mm512_setr_ps(
49550 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49551 );
49552 let src = _mm512_set1_epi32(0);
49553 let r = _mm512_mask_cvttps_epi32(src, 0, a);
49554 assert_eq_m512i(r, src);
49555 let r = _mm512_mask_cvttps_epi32(src, 0b00000000_11111111, a);
49556 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49557 assert_eq_m512i(r, e);
49558 }
49559
49560 #[simd_test(enable = "avx512f")]
49561 unsafe fn test_mm512_maskz_cvttps_epi32() {
49562 let a = _mm512_setr_ps(
49563 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49564 );
49565 let r = _mm512_maskz_cvttps_epi32(0, a);
49566 assert_eq_m512i(r, _mm512_setzero_si512());
49567 let r = _mm512_maskz_cvttps_epi32(0b00000000_11111111, a);
49568 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49569 assert_eq_m512i(r, e);
49570 }
49571
49572 #[simd_test(enable = "avx512f,avx512vl")]
49573 unsafe fn test_mm256_mask_cvttps_epi32() {
49574 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49575 let src = _mm256_set1_epi32(0);
49576 let r = _mm256_mask_cvttps_epi32(src, 0, a);
49577 assert_eq_m256i(r, src);
49578 let r = _mm256_mask_cvttps_epi32(src, 0b11111111, a);
49579 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49580 assert_eq_m256i(r, e);
49581 }
49582
49583 #[simd_test(enable = "avx512f,avx512vl")]
49584 unsafe fn test_mm256_maskz_cvttps_epi32() {
49585 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49586 let r = _mm256_maskz_cvttps_epi32(0, a);
49587 assert_eq_m256i(r, _mm256_setzero_si256());
49588 let r = _mm256_maskz_cvttps_epi32(0b11111111, a);
49589 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49590 assert_eq_m256i(r, e);
49591 }
49592
49593 #[simd_test(enable = "avx512f,avx512vl")]
49594 unsafe fn test_mm_mask_cvttps_epi32() {
49595 let a = _mm_set_ps(12., 13.5, 14., 15.5);
49596 let src = _mm_set1_epi32(0);
49597 let r = _mm_mask_cvttps_epi32(src, 0, a);
49598 assert_eq_m128i(r, src);
49599 let r = _mm_mask_cvttps_epi32(src, 0b00001111, a);
49600 let e = _mm_set_epi32(12, 13, 14, 15);
49601 assert_eq_m128i(r, e);
49602 }
49603
49604 #[simd_test(enable = "avx512f,avx512vl")]
49605 unsafe fn test_mm_maskz_cvttps_epi32() {
49606 let a = _mm_set_ps(12., 13.5, 14., 15.5);
49607 let r = _mm_maskz_cvttps_epi32(0, a);
49608 assert_eq_m128i(r, _mm_setzero_si128());
49609 let r = _mm_maskz_cvttps_epi32(0b00001111, a);
49610 let e = _mm_set_epi32(12, 13, 14, 15);
49611 assert_eq_m128i(r, e);
49612 }
49613
49614 #[simd_test(enable = "avx512f")]
49615 unsafe fn test_mm512_cvttps_epu32() {
49616 let a = _mm512_setr_ps(
49617 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49618 );
49619 let r = _mm512_cvttps_epu32(a);
49620 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49621 assert_eq_m512i(r, e);
49622 }
49623
49624 #[simd_test(enable = "avx512f")]
49625 unsafe fn test_mm512_mask_cvttps_epu32() {
49626 let a = _mm512_setr_ps(
49627 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49628 );
49629 let src = _mm512_set1_epi32(0);
49630 let r = _mm512_mask_cvttps_epu32(src, 0, a);
49631 assert_eq_m512i(r, src);
49632 let r = _mm512_mask_cvttps_epu32(src, 0b00000000_11111111, a);
49633 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49634 assert_eq_m512i(r, e);
49635 }
49636
49637 #[simd_test(enable = "avx512f")]
49638 unsafe fn test_mm512_maskz_cvttps_epu32() {
49639 let a = _mm512_setr_ps(
49640 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49641 );
49642 let r = _mm512_maskz_cvttps_epu32(0, a);
49643 assert_eq_m512i(r, _mm512_setzero_si512());
49644 let r = _mm512_maskz_cvttps_epu32(0b00000000_11111111, a);
49645 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49646 assert_eq_m512i(r, e);
49647 }
49648
49649 #[simd_test(enable = "avx512f,avx512vl")]
49650 unsafe fn test_mm256_cvttps_epu32() {
49651 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49652 let r = _mm256_cvttps_epu32(a);
49653 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49654 assert_eq_m256i(r, e);
49655 }
49656
49657 #[simd_test(enable = "avx512f,avx512vl")]
49658 unsafe fn test_mm256_mask_cvttps_epu32() {
49659 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49660 let src = _mm256_set1_epi32(0);
49661 let r = _mm256_mask_cvttps_epu32(src, 0, a);
49662 assert_eq_m256i(r, src);
49663 let r = _mm256_mask_cvttps_epu32(src, 0b11111111, a);
49664 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49665 assert_eq_m256i(r, e);
49666 }
49667
49668 #[simd_test(enable = "avx512f,avx512vl")]
49669 unsafe fn test_mm256_maskz_cvttps_epu32() {
49670 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49671 let r = _mm256_maskz_cvttps_epu32(0, a);
49672 assert_eq_m256i(r, _mm256_setzero_si256());
49673 let r = _mm256_maskz_cvttps_epu32(0b11111111, a);
49674 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49675 assert_eq_m256i(r, e);
49676 }
49677
49678 #[simd_test(enable = "avx512f,avx512vl")]
49679 unsafe fn test_mm_cvttps_epu32() {
49680 let a = _mm_set_ps(12., 13.5, 14., 15.5);
49681 let r = _mm_cvttps_epu32(a);
49682 let e = _mm_set_epi32(12, 13, 14, 15);
49683 assert_eq_m128i(r, e);
49684 }
49685
49686 #[simd_test(enable = "avx512f,avx512vl")]
49687 unsafe fn test_mm_mask_cvttps_epu32() {
49688 let a = _mm_set_ps(12., 13.5, 14., 15.5);
49689 let src = _mm_set1_epi32(0);
49690 let r = _mm_mask_cvttps_epu32(src, 0, a);
49691 assert_eq_m128i(r, src);
49692 let r = _mm_mask_cvttps_epu32(src, 0b00001111, a);
49693 let e = _mm_set_epi32(12, 13, 14, 15);
49694 assert_eq_m128i(r, e);
49695 }
49696
49697 #[simd_test(enable = "avx512f,avx512vl")]
49698 unsafe fn test_mm_maskz_cvttps_epu32() {
49699 let a = _mm_set_ps(12., 13.5, 14., 15.5);
49700 let r = _mm_maskz_cvttps_epu32(0, a);
49701 assert_eq_m128i(r, _mm_setzero_si128());
49702 let r = _mm_maskz_cvttps_epu32(0b00001111, a);
49703 let e = _mm_set_epi32(12, 13, 14, 15);
49704 assert_eq_m128i(r, e);
49705 }
49706
49707 #[simd_test(enable = "avx512f")]
49708 unsafe fn test_mm512_i32gather_ps() {
49709 let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
49710 // A multiplier of 4 is word-addressing
49711 #[rustfmt::skip]
49712 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49713 120, 128, 136, 144, 152, 160, 168, 176);
49714 let r = _mm512_i32gather_ps::<4>(index, arr.as_ptr());
49715 #[rustfmt::skip]
49716 assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
49717 120., 128., 136., 144., 152., 160., 168., 176.));
49718 }
49719
49720 #[simd_test(enable = "avx512f")]
49721 unsafe fn test_mm512_mask_i32gather_ps() {
49722 let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
49723 let src = _mm512_set1_ps(2.);
49724 let mask = 0b10101010_10101010;
49725 #[rustfmt::skip]
49726 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49727 120, 128, 136, 144, 152, 160, 168, 176);
49728 // A multiplier of 4 is word-addressing
49729 let r = _mm512_mask_i32gather_ps::<4>(src, mask, index, arr.as_ptr());
49730 #[rustfmt::skip]
49731 assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
49732 2., 128., 2., 144., 2., 160., 2., 176.));
49733 }
49734
49735 #[simd_test(enable = "avx512f")]
49736 unsafe fn test_mm512_i32gather_epi32() {
49737 let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
49738 // A multiplier of 4 is word-addressing
49739 #[rustfmt::skip]
49740 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49741 120, 128, 136, 144, 152, 160, 168, 176);
49742 let r = _mm512_i32gather_epi32::<4>(index, arr.as_ptr());
49743 #[rustfmt::skip]
49744 assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49745 120, 128, 136, 144, 152, 160, 168, 176));
49746 }
49747
49748 #[simd_test(enable = "avx512f")]
49749 unsafe fn test_mm512_mask_i32gather_epi32() {
49750 let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
49751 let src = _mm512_set1_epi32(2);
49752 let mask = 0b10101010_10101010;
49753 let index = _mm512_setr_epi32(
49754 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
49755 );
49756 // A multiplier of 4 is word-addressing
49757 let r = _mm512_mask_i32gather_epi32::<4>(src, mask, index, arr.as_ptr());
49758 assert_eq_m512i(
49759 r,
49760 _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, 2, 144, 2, 176, 2, 208, 2, 240),
49761 );
49762 }
49763
49764 #[simd_test(enable = "avx512f")]
49765 unsafe fn test_mm512_i32scatter_ps() {
49766 let mut arr = [0f32; 256];
49767 #[rustfmt::skip]
49768 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49769 128, 144, 160, 176, 192, 208, 224, 240);
49770 let src = _mm512_setr_ps(
49771 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
49772 );
49773 // A multiplier of 4 is word-addressing
49774 _mm512_i32scatter_ps::<4>(arr.as_mut_ptr(), index, src);
49775 let mut expected = [0f32; 256];
49776 for i in 0..16 {
49777 expected[i * 16] = (i + 1) as f32;
49778 }
49779 assert_eq!(&arr[..], &expected[..],);
49780 }
49781
49782 #[simd_test(enable = "avx512f")]
49783 unsafe fn test_mm512_mask_i32scatter_ps() {
49784 let mut arr = [0f32; 256];
49785 let mask = 0b10101010_10101010;
49786 #[rustfmt::skip]
49787 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49788 128, 144, 160, 176, 192, 208, 224, 240);
49789 let src = _mm512_setr_ps(
49790 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
49791 );
49792 // A multiplier of 4 is word-addressing
49793 _mm512_mask_i32scatter_ps::<4>(arr.as_mut_ptr(), mask, index, src);
49794 let mut expected = [0f32; 256];
49795 for i in 0..8 {
49796 expected[i * 32 + 16] = 2. * (i + 1) as f32;
49797 }
49798 assert_eq!(&arr[..], &expected[..],);
49799 }
49800
49801 #[simd_test(enable = "avx512f")]
49802 unsafe fn test_mm512_i32scatter_epi32() {
49803 let mut arr = [0i32; 256];
49804 #[rustfmt::skip]
49805
49806 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49807 128, 144, 160, 176, 192, 208, 224, 240);
49808 let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49809 // A multiplier of 4 is word-addressing
49810 _mm512_i32scatter_epi32::<4>(arr.as_mut_ptr(), index, src);
49811 let mut expected = [0i32; 256];
49812 for i in 0..16 {
49813 expected[i * 16] = (i + 1) as i32;
49814 }
49815 assert_eq!(&arr[..], &expected[..],);
49816 }
49817
49818 #[simd_test(enable = "avx512f")]
49819 unsafe fn test_mm512_mask_i32scatter_epi32() {
49820 let mut arr = [0i32; 256];
49821 let mask = 0b10101010_10101010;
49822 #[rustfmt::skip]
49823 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49824 128, 144, 160, 176, 192, 208, 224, 240);
49825 let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49826 // A multiplier of 4 is word-addressing
49827 _mm512_mask_i32scatter_epi32::<4>(arr.as_mut_ptr(), mask, index, src);
49828 let mut expected = [0i32; 256];
49829 for i in 0..8 {
49830 expected[i * 32 + 16] = 2 * (i + 1) as i32;
49831 }
49832 assert_eq!(&arr[..], &expected[..],);
49833 }
49834
49835 #[simd_test(enable = "avx512f")]
49836 unsafe fn test_mm512_cmplt_ps_mask() {
49837 #[rustfmt::skip]
49838 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49839 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49840 let b = _mm512_set1_ps(-1.);
49841 let m = _mm512_cmplt_ps_mask(a, b);
49842 assert_eq!(m, 0b00000101_00000101);
49843 }
49844
49845 #[simd_test(enable = "avx512f")]
49846 unsafe fn test_mm512_mask_cmplt_ps_mask() {
49847 #[rustfmt::skip]
49848 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49849 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49850 let b = _mm512_set1_ps(-1.);
49851 let mask = 0b01100110_01100110;
49852 let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
49853 assert_eq!(r, 0b00000100_00000100);
49854 }
49855
49856 #[simd_test(enable = "avx512f")]
49857 unsafe fn test_mm512_cmpnlt_ps_mask() {
49858 #[rustfmt::skip]
49859 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49860 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49861 let b = _mm512_set1_ps(-1.);
49862 assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
49863 }
49864
49865 #[simd_test(enable = "avx512f")]
49866 unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
49867 #[rustfmt::skip]
49868 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49869 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49870 let b = _mm512_set1_ps(-1.);
49871 let mask = 0b01111010_01111010;
49872 assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
49873 }
49874
49875 #[simd_test(enable = "avx512f")]
49876 unsafe fn test_mm512_cmpnle_ps_mask() {
49877 #[rustfmt::skip]
49878 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49879 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49880 let b = _mm512_set1_ps(-1.);
49881 let m = _mm512_cmpnle_ps_mask(b, a);
49882 assert_eq!(m, 0b00001101_00001101);
49883 }
49884
49885 #[simd_test(enable = "avx512f")]
49886 unsafe fn test_mm512_mask_cmpnle_ps_mask() {
49887 #[rustfmt::skip]
49888 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49889 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49890 let b = _mm512_set1_ps(-1.);
49891 let mask = 0b01100110_01100110;
49892 let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
49893 assert_eq!(r, 0b00000100_00000100);
49894 }
49895
49896 #[simd_test(enable = "avx512f")]
49897 unsafe fn test_mm512_cmple_ps_mask() {
49898 #[rustfmt::skip]
49899 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49900 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49901 let b = _mm512_set1_ps(-1.);
49902 assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
49903 }
49904
49905 #[simd_test(enable = "avx512f")]
49906 unsafe fn test_mm512_mask_cmple_ps_mask() {
49907 #[rustfmt::skip]
49908 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49909 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49910 let b = _mm512_set1_ps(-1.);
49911 let mask = 0b01111010_01111010;
49912 assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
49913 }
49914
49915 #[simd_test(enable = "avx512f")]
49916 unsafe fn test_mm512_cmpeq_ps_mask() {
49917 #[rustfmt::skip]
49918 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49919 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49920 #[rustfmt::skip]
49921 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49922 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49923 let m = _mm512_cmpeq_ps_mask(b, a);
49924 assert_eq!(m, 0b11001101_11001101);
49925 }
49926
49927 #[simd_test(enable = "avx512f")]
49928 unsafe fn test_mm512_mask_cmpeq_ps_mask() {
49929 #[rustfmt::skip]
49930 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49931 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49932 #[rustfmt::skip]
49933 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49934 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49935 let mask = 0b01111010_01111010;
49936 let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
49937 assert_eq!(r, 0b01001000_01001000);
49938 }
49939
49940 #[simd_test(enable = "avx512f")]
49941 unsafe fn test_mm512_cmpneq_ps_mask() {
49942 #[rustfmt::skip]
49943 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49944 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49945 #[rustfmt::skip]
49946 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49947 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49948 let m = _mm512_cmpneq_ps_mask(b, a);
49949 assert_eq!(m, 0b00110010_00110010);
49950 }
49951
49952 #[simd_test(enable = "avx512f")]
49953 unsafe fn test_mm512_mask_cmpneq_ps_mask() {
49954 #[rustfmt::skip]
49955 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49956 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49957 #[rustfmt::skip]
49958 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49959 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49960 let mask = 0b01111010_01111010;
49961 let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
49962 assert_eq!(r, 0b00110010_00110010)
49963 }
49964
49965 #[simd_test(enable = "avx512f")]
49966 unsafe fn test_mm512_cmp_ps_mask() {
49967 #[rustfmt::skip]
49968 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49969 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49970 let b = _mm512_set1_ps(-1.);
49971 let m = _mm512_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49972 assert_eq!(m, 0b00000101_00000101);
49973 }
49974
49975 #[simd_test(enable = "avx512f")]
49976 unsafe fn test_mm512_mask_cmp_ps_mask() {
49977 #[rustfmt::skip]
49978 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49979 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49980 let b = _mm512_set1_ps(-1.);
49981 let mask = 0b01100110_01100110;
49982 let r = _mm512_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49983 assert_eq!(r, 0b00000100_00000100);
49984 }
49985
49986 #[simd_test(enable = "avx512f,avx512vl")]
49987 unsafe fn test_mm256_cmp_ps_mask() {
49988 let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49989 let b = _mm256_set1_ps(-1.);
49990 let m = _mm256_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49991 assert_eq!(m, 0b00000101);
49992 }
49993
49994 #[simd_test(enable = "avx512f,avx512vl")]
49995 unsafe fn test_mm256_mask_cmp_ps_mask() {
49996 let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49997 let b = _mm256_set1_ps(-1.);
49998 let mask = 0b01100110;
49999 let r = _mm256_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
50000 assert_eq!(r, 0b00000100);
50001 }
50002
50003 #[simd_test(enable = "avx512f,avx512vl")]
50004 unsafe fn test_mm_cmp_ps_mask() {
50005 let a = _mm_set_ps(0., 1., -1., 13.);
50006 let b = _mm_set1_ps(1.);
50007 let m = _mm_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
50008 assert_eq!(m, 0b00001010);
50009 }
50010
50011 #[simd_test(enable = "avx512f,avx512vl")]
50012 unsafe fn test_mm_mask_cmp_ps_mask() {
50013 let a = _mm_set_ps(0., 1., -1., 13.);
50014 let b = _mm_set1_ps(1.);
50015 let mask = 0b11111111;
50016 let r = _mm_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
50017 assert_eq!(r, 0b00001010);
50018 }
50019
50020 #[simd_test(enable = "avx512f")]
50021 unsafe fn test_mm512_cmp_round_ps_mask() {
50022 #[rustfmt::skip]
50023 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
50024 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
50025 let b = _mm512_set1_ps(-1.);
50026 let m = _mm512_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b);
50027 assert_eq!(m, 0b00000101_00000101);
50028 }
50029
50030 #[simd_test(enable = "avx512f")]
50031 unsafe fn test_mm512_mask_cmp_round_ps_mask() {
50032 #[rustfmt::skip]
50033 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
50034 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
50035 let b = _mm512_set1_ps(-1.);
50036 let mask = 0b01100110_01100110;
50037 let r = _mm512_mask_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b);
50038 assert_eq!(r, 0b00000100_00000100);
50039 }
50040
50041 #[simd_test(enable = "avx512f")]
50042 unsafe fn test_mm512_cmpord_ps_mask() {
50043 #[rustfmt::skip]
50044 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
50045 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
50046 #[rustfmt::skip]
50047 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
50048 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
50049 let m = _mm512_cmpord_ps_mask(a, b);
50050 assert_eq!(m, 0b00000101_00000101);
50051 }
50052
50053 #[simd_test(enable = "avx512f")]
50054 unsafe fn test_mm512_mask_cmpord_ps_mask() {
50055 #[rustfmt::skip]
50056 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
50057 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
50058 #[rustfmt::skip]
50059 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
50060 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
50061 let mask = 0b11000011_11000011;
50062 let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
50063 assert_eq!(m, 0b00000001_00000001);
50064 }
50065
50066 #[simd_test(enable = "avx512f")]
50067 unsafe fn test_mm512_cmpunord_ps_mask() {
50068 #[rustfmt::skip]
50069 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
50070 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
50071 #[rustfmt::skip]
50072 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
50073 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
50074 let m = _mm512_cmpunord_ps_mask(a, b);
50075
50076 assert_eq!(m, 0b11111010_11111010);
50077 }
50078
50079 #[simd_test(enable = "avx512f")]
50080 unsafe fn test_mm512_mask_cmpunord_ps_mask() {
50081 #[rustfmt::skip]
50082 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
50083 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
50084 #[rustfmt::skip]
50085 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
50086 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
50087 let mask = 0b00001111_00001111;
50088 let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
50089 assert_eq!(m, 0b000001010_00001010);
50090 }
50091
50092 #[simd_test(enable = "avx512f")]
50093 unsafe fn test_mm_cmp_ss_mask() {
50094 let a = _mm_setr_ps(2., 1., 1., 1.);
50095 let b = _mm_setr_ps(1., 2., 2., 2.);
50096 let m = _mm_cmp_ss_mask::<_CMP_GE_OS>(a, b);
50097 assert_eq!(m, 1);
50098 }
50099
50100 #[simd_test(enable = "avx512f")]
50101 unsafe fn test_mm_mask_cmp_ss_mask() {
50102 let a = _mm_setr_ps(2., 1., 1., 1.);
50103 let b = _mm_setr_ps(1., 2., 2., 2.);
50104 let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b10, a, b);
50105 assert_eq!(m, 0);
50106 let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b1, a, b);
50107 assert_eq!(m, 1);
50108 }
50109
50110 #[simd_test(enable = "avx512f")]
50111 unsafe fn test_mm_cmp_round_ss_mask() {
50112 let a = _mm_setr_ps(2., 1., 1., 1.);
50113 let b = _mm_setr_ps(1., 2., 2., 2.);
50114 let m = _mm_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
50115 assert_eq!(m, 1);
50116 }
50117
50118 #[simd_test(enable = "avx512f")]
50119 unsafe fn test_mm_mask_cmp_round_ss_mask() {
50120 let a = _mm_setr_ps(2., 1., 1., 1.);
50121 let b = _mm_setr_ps(1., 2., 2., 2.);
50122 let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
50123 assert_eq!(m, 0);
50124 let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
50125 assert_eq!(m, 1);
50126 }
50127
50128 #[simd_test(enable = "avx512f")]
50129 unsafe fn test_mm_cmp_sd_mask() {
50130 let a = _mm_setr_pd(2., 1.);
50131 let b = _mm_setr_pd(1., 2.);
50132 let m = _mm_cmp_sd_mask::<_CMP_GE_OS>(a, b);
50133 assert_eq!(m, 1);
50134 }
50135
50136 #[simd_test(enable = "avx512f")]
50137 unsafe fn test_mm_mask_cmp_sd_mask() {
50138 let a = _mm_setr_pd(2., 1.);
50139 let b = _mm_setr_pd(1., 2.);
50140 let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b10, a, b);
50141 assert_eq!(m, 0);
50142 let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b1, a, b);
50143 assert_eq!(m, 1);
50144 }
50145
50146 #[simd_test(enable = "avx512f")]
50147 unsafe fn test_mm_cmp_round_sd_mask() {
50148 let a = _mm_setr_pd(2., 1.);
50149 let b = _mm_setr_pd(1., 2.);
50150 let m = _mm_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
50151 assert_eq!(m, 1);
50152 }
50153
50154 #[simd_test(enable = "avx512f")]
50155 unsafe fn test_mm_mask_cmp_round_sd_mask() {
50156 let a = _mm_setr_pd(2., 1.);
50157 let b = _mm_setr_pd(1., 2.);
50158 let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
50159 assert_eq!(m, 0);
50160 let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
50161 assert_eq!(m, 1);
50162 }
50163
50164 #[simd_test(enable = "avx512f")]
50165 unsafe fn test_mm512_cmplt_epu32_mask() {
50166 #[rustfmt::skip]
50167 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50168 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50169 let b = _mm512_set1_epi32(-1);
50170 let m = _mm512_cmplt_epu32_mask(a, b);
50171 assert_eq!(m, 0b11001111_11001111);
50172 }
50173
50174 #[simd_test(enable = "avx512f")]
50175 unsafe fn test_mm512_mask_cmplt_epu32_mask() {
50176 #[rustfmt::skip]
50177 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50178 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50179 let b = _mm512_set1_epi32(-1);
50180 let mask = 0b01111010_01111010;
50181 let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
50182 assert_eq!(r, 0b01001010_01001010);
50183 }
50184
50185 #[simd_test(enable = "avx512f,avx512vl")]
50186 unsafe fn test_mm256_cmplt_epu32_mask() {
50187 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
50188 let b = _mm256_set1_epi32(1);
50189 let r = _mm256_cmplt_epu32_mask(a, b);
50190 assert_eq!(r, 0b10000000);
50191 }
50192
50193 #[simd_test(enable = "avx512f,avx512vl")]
50194 unsafe fn test_mm256_mask_cmplt_epu32_mask() {
50195 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
50196 let b = _mm256_set1_epi32(1);
50197 let mask = 0b11111111;
50198 let r = _mm256_mask_cmplt_epu32_mask(mask, a, b);
50199 assert_eq!(r, 0b10000000);
50200 }
50201
50202 #[simd_test(enable = "avx512f,avx512vl")]
50203 unsafe fn test_mm_cmplt_epu32_mask() {
50204 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50205 let b = _mm_set1_epi32(1);
50206 let r = _mm_cmplt_epu32_mask(a, b);
50207 assert_eq!(r, 0b00001000);
50208 }
50209
50210 #[simd_test(enable = "avx512f,avx512vl")]
50211 unsafe fn test_mm_mask_cmplt_epu32_mask() {
50212 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50213 let b = _mm_set1_epi32(1);
50214 let mask = 0b11111111;
50215 let r = _mm_mask_cmplt_epu32_mask(mask, a, b);
50216 assert_eq!(r, 0b00001000);
50217 }
50218
50219 #[simd_test(enable = "avx512f")]
50220 unsafe fn test_mm512_cmpgt_epu32_mask() {
50221 #[rustfmt::skip]
50222 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50223 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50224 let b = _mm512_set1_epi32(-1);
50225 let m = _mm512_cmpgt_epu32_mask(b, a);
50226 assert_eq!(m, 0b11001111_11001111);
50227 }
50228
50229 #[simd_test(enable = "avx512f")]
50230 unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
50231 #[rustfmt::skip]
50232 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50233 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50234 let b = _mm512_set1_epi32(-1);
50235 let mask = 0b01111010_01111010;
50236 let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
50237 assert_eq!(r, 0b01001010_01001010);
50238 }
50239
50240 #[simd_test(enable = "avx512f,avx512vl")]
50241 unsafe fn test_mm256_cmpgt_epu32_mask() {
50242 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
50243 let b = _mm256_set1_epi32(1);
50244 let r = _mm256_cmpgt_epu32_mask(a, b);
50245 assert_eq!(r, 0b00111111);
50246 }
50247
50248 #[simd_test(enable = "avx512f,avx512vl")]
50249 unsafe fn test_mm256_mask_cmpgt_epu32_mask() {
50250 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
50251 let b = _mm256_set1_epi32(1);
50252 let mask = 0b11111111;
50253 let r = _mm256_mask_cmpgt_epu32_mask(mask, a, b);
50254 assert_eq!(r, 0b00111111);
50255 }
50256
50257 #[simd_test(enable = "avx512f,avx512vl")]
50258 unsafe fn test_mm_cmpgt_epu32_mask() {
50259 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50260 let b = _mm_set1_epi32(1);
50261 let r = _mm_cmpgt_epu32_mask(a, b);
50262 assert_eq!(r, 0b00000011);
50263 }
50264
50265 #[simd_test(enable = "avx512f,avx512vl")]
50266 unsafe fn test_mm_mask_cmpgt_epu32_mask() {
50267 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50268 let b = _mm_set1_epi32(1);
50269 let mask = 0b11111111;
50270 let r = _mm_mask_cmpgt_epu32_mask(mask, a, b);
50271 assert_eq!(r, 0b00000011);
50272 }
50273
50274 #[simd_test(enable = "avx512f")]
50275 unsafe fn test_mm512_cmple_epu32_mask() {
50276 #[rustfmt::skip]
50277 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50278 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50279 let b = _mm512_set1_epi32(-1);
50280 assert_eq!(
50281 _mm512_cmple_epu32_mask(a, b),
50282 !_mm512_cmpgt_epu32_mask(a, b)
50283 )
50284 }
50285
50286 #[simd_test(enable = "avx512f")]
50287 unsafe fn test_mm512_mask_cmple_epu32_mask() {
50288 #[rustfmt::skip]
50289 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50290 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50291 let b = _mm512_set1_epi32(-1);
50292 let mask = 0b01111010_01111010;
50293 assert_eq!(
50294 _mm512_mask_cmple_epu32_mask(mask, a, b),
50295 0b01111010_01111010
50296 );
50297 }
50298
50299 #[simd_test(enable = "avx512f,avx512vl")]
50300 unsafe fn test_mm256_cmple_epu32_mask() {
50301 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
50302 let b = _mm256_set1_epi32(1);
50303 let r = _mm256_cmple_epu32_mask(a, b);
50304 assert_eq!(r, 0b11000000)
50305 }
50306
50307 #[simd_test(enable = "avx512f,avx512vl")]
50308 unsafe fn test_mm256_mask_cmple_epu32_mask() {
50309 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
50310 let b = _mm256_set1_epi32(1);
50311 let mask = 0b11111111;
50312 let r = _mm256_mask_cmple_epu32_mask(mask, a, b);
50313 assert_eq!(r, 0b11000000)
50314 }
50315
50316 #[simd_test(enable = "avx512f,avx512vl")]
50317 unsafe fn test_mm_cmple_epu32_mask() {
50318 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50319 let b = _mm_set1_epi32(1);
50320 let r = _mm_cmple_epu32_mask(a, b);
50321 assert_eq!(r, 0b00001100)
50322 }
50323
50324 #[simd_test(enable = "avx512f,avx512vl")]
50325 unsafe fn test_mm_mask_cmple_epu32_mask() {
50326 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50327 let b = _mm_set1_epi32(1);
50328 let mask = 0b11111111;
50329 let r = _mm_mask_cmple_epu32_mask(mask, a, b);
50330 assert_eq!(r, 0b00001100)
50331 }
50332
50333 #[simd_test(enable = "avx512f")]
50334 unsafe fn test_mm512_cmpge_epu32_mask() {
50335 #[rustfmt::skip]
50336 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50337 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50338 let b = _mm512_set1_epi32(-1);
50339 assert_eq!(
50340 _mm512_cmpge_epu32_mask(a, b),
50341 !_mm512_cmplt_epu32_mask(a, b)
50342 )
50343 }
50344
50345 #[simd_test(enable = "avx512f")]
50346 unsafe fn test_mm512_mask_cmpge_epu32_mask() {
50347 #[rustfmt::skip]
50348 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50349 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50350 let b = _mm512_set1_epi32(-1);
50351 let mask = 0b01111010_01111010;
50352 assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
50353 }
50354
50355 #[simd_test(enable = "avx512f,avx512vl")]
50356 unsafe fn test_mm256_cmpge_epu32_mask() {
50357 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
50358 let b = _mm256_set1_epi32(1);
50359 let r = _mm256_cmpge_epu32_mask(a, b);
50360 assert_eq!(r, 0b01111111)
50361 }
50362
50363 #[simd_test(enable = "avx512f,avx512vl")]
50364 unsafe fn test_mm256_mask_cmpge_epu32_mask() {
50365 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
50366 let b = _mm256_set1_epi32(1);
50367 let mask = 0b11111111;
50368 let r = _mm256_mask_cmpge_epu32_mask(mask, a, b);
50369 assert_eq!(r, 0b01111111)
50370 }
50371
50372 #[simd_test(enable = "avx512f,avx512vl")]
50373 unsafe fn test_mm_cmpge_epu32_mask() {
50374 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50375 let b = _mm_set1_epi32(1);
50376 let r = _mm_cmpge_epu32_mask(a, b);
50377 assert_eq!(r, 0b00000111)
50378 }
50379
50380 #[simd_test(enable = "avx512f,avx512vl")]
50381 unsafe fn test_mm_mask_cmpge_epu32_mask() {
50382 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50383 let b = _mm_set1_epi32(1);
50384 let mask = 0b11111111;
50385 let r = _mm_mask_cmpge_epu32_mask(mask, a, b);
50386 assert_eq!(r, 0b00000111)
50387 }
50388
50389 #[simd_test(enable = "avx512f")]
50390 unsafe fn test_mm512_cmpeq_epu32_mask() {
50391 #[rustfmt::skip]
50392 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50393 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50394 #[rustfmt::skip]
50395 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50396 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50397 let m = _mm512_cmpeq_epu32_mask(b, a);
50398 assert_eq!(m, 0b11001111_11001111);
50399 }
50400
50401 #[simd_test(enable = "avx512f")]
50402 unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
50403 #[rustfmt::skip]
50404 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50405 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50406 #[rustfmt::skip]
50407 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50408 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50409 let mask = 0b01111010_01111010;
50410 let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
50411 assert_eq!(r, 0b01001010_01001010);
50412 }
50413
50414 #[simd_test(enable = "avx512f,avx512vl")]
50415 unsafe fn test_mm256_cmpeq_epu32_mask() {
50416 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50417 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50418 let m = _mm256_cmpeq_epu32_mask(b, a);
50419 assert_eq!(m, 0b11001111);
50420 }
50421
50422 #[simd_test(enable = "avx512f,avx512vl")]
50423 unsafe fn test_mm256_mask_cmpeq_epu32_mask() {
50424 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50425 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50426 let mask = 0b01111010;
50427 let r = _mm256_mask_cmpeq_epu32_mask(mask, b, a);
50428 assert_eq!(r, 0b01001010);
50429 }
50430
50431 #[simd_test(enable = "avx512f,avx512vl")]
50432 unsafe fn test_mm_cmpeq_epu32_mask() {
50433 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50434 let b = _mm_set_epi32(0, 1, 13, 42);
50435 let m = _mm_cmpeq_epu32_mask(b, a);
50436 assert_eq!(m, 0b00001100);
50437 }
50438
50439 #[simd_test(enable = "avx512f,avx512vl")]
50440 unsafe fn test_mm_mask_cmpeq_epu32_mask() {
50441 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50442 let b = _mm_set_epi32(0, 1, 13, 42);
50443 let mask = 0b11111111;
50444 let r = _mm_mask_cmpeq_epu32_mask(mask, b, a);
50445 assert_eq!(r, 0b00001100);
50446 }
50447
50448 #[simd_test(enable = "avx512f")]
50449 unsafe fn test_mm512_cmpneq_epu32_mask() {
50450 #[rustfmt::skip]
50451 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50452 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50453 #[rustfmt::skip]
50454 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50455 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50456 let m = _mm512_cmpneq_epu32_mask(b, a);
50457 assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
50458 }
50459
50460 #[simd_test(enable = "avx512f")]
50461 unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
50462 #[rustfmt::skip]
50463 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
50464 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50465 #[rustfmt::skip]
50466 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50467 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50468 let mask = 0b01111010_01111010;
50469 let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
50470 assert_eq!(r, 0b00110010_00110010);
50471 }
50472
50473 #[simd_test(enable = "avx512f,avx512vl")]
50474 unsafe fn test_mm256_cmpneq_epu32_mask() {
50475 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50476 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
50477 let r = _mm256_cmpneq_epu32_mask(b, a);
50478 assert_eq!(r, 0b00110000);
50479 }
50480
50481 #[simd_test(enable = "avx512f,avx512vl")]
50482 unsafe fn test_mm256_mask_cmpneq_epu32_mask() {
50483 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50484 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
50485 let mask = 0b11111111;
50486 let r = _mm256_mask_cmpneq_epu32_mask(mask, b, a);
50487 assert_eq!(r, 0b00110000);
50488 }
50489
50490 #[simd_test(enable = "avx512f,avx512vl")]
50491 unsafe fn test_mm_cmpneq_epu32_mask() {
50492 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50493 let b = _mm_set_epi32(0, 1, 13, 42);
50494 let r = _mm_cmpneq_epu32_mask(b, a);
50495 assert_eq!(r, 0b00000011);
50496 }
50497
50498 #[simd_test(enable = "avx512f,avx512vl")]
50499 unsafe fn test_mm_mask_cmpneq_epu32_mask() {
50500 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50501 let b = _mm_set_epi32(0, 1, 13, 42);
50502 let mask = 0b11111111;
50503 let r = _mm_mask_cmpneq_epu32_mask(mask, b, a);
50504 assert_eq!(r, 0b00000011);
50505 }
50506
50507 #[simd_test(enable = "avx512f")]
50508 unsafe fn test_mm512_cmp_epu32_mask() {
50509 #[rustfmt::skip]
50510 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50511 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50512 let b = _mm512_set1_epi32(-1);
50513 let m = _mm512_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50514 assert_eq!(m, 0b11001111_11001111);
50515 }
50516
50517 #[simd_test(enable = "avx512f")]
50518 unsafe fn test_mm512_mask_cmp_epu32_mask() {
50519 #[rustfmt::skip]
50520 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50521 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50522 let b = _mm512_set1_epi32(-1);
50523 let mask = 0b01111010_01111010;
50524 let r = _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50525 assert_eq!(r, 0b01001010_01001010);
50526 }
50527
50528 #[simd_test(enable = "avx512f,avx512vl")]
50529 unsafe fn test_mm256_cmp_epu32_mask() {
50530 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50531 let b = _mm256_set1_epi32(-1);
50532 let m = _mm256_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50533 assert_eq!(m, 0b11001111);
50534 }
50535
50536 #[simd_test(enable = "avx512f,avx512vl")]
50537 unsafe fn test_mm256_mask_cmp_epu32_mask() {
50538 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50539 let b = _mm256_set1_epi32(-1);
50540 let mask = 0b11111111;
50541 let r = _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50542 assert_eq!(r, 0b11001111);
50543 }
50544
50545 #[simd_test(enable = "avx512f,avx512vl")]
50546 unsafe fn test_mm_cmp_epu32_mask() {
50547 let a = _mm_set_epi32(0, 1, -1, i32::MAX);
50548 let b = _mm_set1_epi32(1);
50549 let m = _mm_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50550 assert_eq!(m, 0b00001000);
50551 }
50552
50553 #[simd_test(enable = "avx512f,avx512vl")]
50554 unsafe fn test_mm_mask_cmp_epu32_mask() {
50555 let a = _mm_set_epi32(0, 1, -1, i32::MAX);
50556 let b = _mm_set1_epi32(1);
50557 let mask = 0b11111111;
50558 let r = _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50559 assert_eq!(r, 0b00001000);
50560 }
50561
50562 #[simd_test(enable = "avx512f")]
50563 unsafe fn test_mm512_cmplt_epi32_mask() {
50564 #[rustfmt::skip]
50565 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50566 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50567 let b = _mm512_set1_epi32(-1);
50568 let m = _mm512_cmplt_epi32_mask(a, b);
50569 assert_eq!(m, 0b00000101_00000101);
50570 }
50571
50572 #[simd_test(enable = "avx512f")]
50573 unsafe fn test_mm512_mask_cmplt_epi32_mask() {
50574 #[rustfmt::skip]
50575 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50576 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50577 let b = _mm512_set1_epi32(-1);
50578 let mask = 0b01100110_01100110;
50579 let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
50580 assert_eq!(r, 0b00000100_00000100);
50581 }
50582
50583 #[simd_test(enable = "avx512f,avx512vl")]
50584 unsafe fn test_mm256_cmplt_epi32_mask() {
50585 let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
50586 let b = _mm256_set1_epi32(-1);
50587 let r = _mm256_cmplt_epi32_mask(a, b);
50588 assert_eq!(r, 0b00000101);
50589 }
50590
50591 #[simd_test(enable = "avx512f,avx512vl")]
50592 unsafe fn test_mm256_mask_cmplt_epi32_mask() {
50593 let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
50594 let b = _mm256_set1_epi32(-1);
50595 let mask = 0b11111111;
50596 let r = _mm256_mask_cmplt_epi32_mask(mask, a, b);
50597 assert_eq!(r, 0b00000101);
50598 }
50599
50600 #[simd_test(enable = "avx512f,avx512vl")]
50601 unsafe fn test_mm_cmplt_epi32_mask() {
50602 let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
50603 let b = _mm_set1_epi32(-1);
50604 let r = _mm_cmplt_epi32_mask(a, b);
50605 assert_eq!(r, 0b00000101);
50606 }
50607
50608 #[simd_test(enable = "avx512f,avx512vl")]
50609 unsafe fn test_mm_mask_cmplt_epi32_mask() {
50610 let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
50611 let b = _mm_set1_epi32(-1);
50612 let mask = 0b11111111;
50613 let r = _mm_mask_cmplt_epi32_mask(mask, a, b);
50614 assert_eq!(r, 0b00000101);
50615 }
50616
50617 #[simd_test(enable = "avx512f")]
50618 unsafe fn test_mm512_cmpgt_epi32_mask() {
50619 #[rustfmt::skip]
50620 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50621 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50622 let b = _mm512_set1_epi32(-1);
50623 let m = _mm512_cmpgt_epi32_mask(b, a);
50624 assert_eq!(m, 0b00000101_00000101);
50625 }
50626
50627 #[simd_test(enable = "avx512f")]
50628 unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
50629 #[rustfmt::skip]
50630 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50631 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50632 let b = _mm512_set1_epi32(-1);
50633 let mask = 0b01100110_01100110;
50634 let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
50635 assert_eq!(r, 0b00000100_00000100);
50636 }
50637
50638 #[simd_test(enable = "avx512f,avx512vl")]
50639 unsafe fn test_mm256_cmpgt_epi32_mask() {
50640 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50641 let b = _mm256_set1_epi32(-1);
50642 let r = _mm256_cmpgt_epi32_mask(a, b);
50643 assert_eq!(r, 0b11011010);
50644 }
50645
50646 #[simd_test(enable = "avx512f,avx512vl")]
50647 unsafe fn test_mm256_mask_cmpgt_epi32_mask() {
50648 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50649 let b = _mm256_set1_epi32(-1);
50650 let mask = 0b11111111;
50651 let r = _mm256_mask_cmpgt_epi32_mask(mask, a, b);
50652 assert_eq!(r, 0b11011010);
50653 }
50654
50655 #[simd_test(enable = "avx512f,avx512vl")]
50656 unsafe fn test_mm_cmpgt_epi32_mask() {
50657 let a = _mm_set_epi32(0, 1, -1, 13);
50658 let b = _mm_set1_epi32(-1);
50659 let r = _mm_cmpgt_epi32_mask(a, b);
50660 assert_eq!(r, 0b00001101);
50661 }
50662
50663 #[simd_test(enable = "avx512f,avx512vl")]
50664 unsafe fn test_mm_mask_cmpgt_epi32_mask() {
50665 let a = _mm_set_epi32(0, 1, -1, 13);
50666 let b = _mm_set1_epi32(-1);
50667 let mask = 0b11111111;
50668 let r = _mm_mask_cmpgt_epi32_mask(mask, a, b);
50669 assert_eq!(r, 0b00001101);
50670 }
50671
50672 #[simd_test(enable = "avx512f")]
50673 unsafe fn test_mm512_cmple_epi32_mask() {
50674 #[rustfmt::skip]
50675 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50676 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50677 let b = _mm512_set1_epi32(-1);
50678 assert_eq!(
50679 _mm512_cmple_epi32_mask(a, b),
50680 !_mm512_cmpgt_epi32_mask(a, b)
50681 )
50682 }
50683
50684 #[simd_test(enable = "avx512f")]
50685 unsafe fn test_mm512_mask_cmple_epi32_mask() {
50686 #[rustfmt::skip]
50687 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50688 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50689 let b = _mm512_set1_epi32(-1);
50690 let mask = 0b01111010_01111010;
50691 assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
50692 }
50693
50694 #[simd_test(enable = "avx512f,avx512vl")]
50695 unsafe fn test_mm256_cmple_epi32_mask() {
50696 let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
50697 let b = _mm256_set1_epi32(-1);
50698 let r = _mm256_cmple_epi32_mask(a, b);
50699 assert_eq!(r, 0b00100101)
50700 }
50701
50702 #[simd_test(enable = "avx512f,avx512vl")]
50703 unsafe fn test_mm256_mask_cmple_epi32_mask() {
50704 let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
50705 let b = _mm256_set1_epi32(-1);
50706 let mask = 0b11111111;
50707 let r = _mm256_mask_cmple_epi32_mask(mask, a, b);
50708 assert_eq!(r, 0b00100101)
50709 }
50710
50711 #[simd_test(enable = "avx512f,avx512vl")]
50712 unsafe fn test_mm_cmple_epi32_mask() {
50713 let a = _mm_set_epi32(0, 1, -1, 200);
50714 let b = _mm_set1_epi32(-1);
50715 let r = _mm_cmple_epi32_mask(a, b);
50716 assert_eq!(r, 0b00000010)
50717 }
50718
50719 #[simd_test(enable = "avx512f,avx512vl")]
50720 unsafe fn test_mm_mask_cmple_epi32_mask() {
50721 let a = _mm_set_epi32(0, 1, -1, 200);
50722 let b = _mm_set1_epi32(-1);
50723 let mask = 0b11111111;
50724 let r = _mm_mask_cmple_epi32_mask(mask, a, b);
50725 assert_eq!(r, 0b00000010)
50726 }
50727
50728 #[simd_test(enable = "avx512f")]
50729 unsafe fn test_mm512_cmpge_epi32_mask() {
50730 #[rustfmt::skip]
50731 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50732 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50733 let b = _mm512_set1_epi32(-1);
50734 assert_eq!(
50735 _mm512_cmpge_epi32_mask(a, b),
50736 !_mm512_cmplt_epi32_mask(a, b)
50737 )
50738 }
50739
50740 #[simd_test(enable = "avx512f")]
50741 unsafe fn test_mm512_mask_cmpge_epi32_mask() {
50742 #[rustfmt::skip]
50743 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50744 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50745 let b = _mm512_set1_epi32(-1);
50746 let mask = 0b01111010_01111010;
50747 assert_eq!(
50748 _mm512_mask_cmpge_epi32_mask(mask, a, b),
50749 0b01111010_01111010
50750 );
50751 }
50752
50753 #[simd_test(enable = "avx512f,avx512vl")]
50754 unsafe fn test_mm256_cmpge_epi32_mask() {
50755 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50756 let b = _mm256_set1_epi32(-1);
50757 let r = _mm256_cmpge_epi32_mask(a, b);
50758 assert_eq!(r, 0b11111010)
50759 }
50760
50761 #[simd_test(enable = "avx512f,avx512vl")]
50762 unsafe fn test_mm256_mask_cmpge_epi32_mask() {
50763 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50764 let b = _mm256_set1_epi32(-1);
50765 let mask = 0b11111111;
50766 let r = _mm256_mask_cmpge_epi32_mask(mask, a, b);
50767 assert_eq!(r, 0b11111010)
50768 }
50769
50770 #[simd_test(enable = "avx512f,avx512vl")]
50771 unsafe fn test_mm_cmpge_epi32_mask() {
50772 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50773 let b = _mm_set1_epi32(-1);
50774 let r = _mm_cmpge_epi32_mask(a, b);
50775 assert_eq!(r, 0b00001111)
50776 }
50777
50778 #[simd_test(enable = "avx512f,avx512vl")]
50779 unsafe fn test_mm_mask_cmpge_epi32_mask() {
50780 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50781 let b = _mm_set1_epi32(-1);
50782 let mask = 0b11111111;
50783 let r = _mm_mask_cmpge_epi32_mask(mask, a, b);
50784 assert_eq!(r, 0b00001111)
50785 }
50786
50787 #[simd_test(enable = "avx512f")]
50788 unsafe fn test_mm512_cmpeq_epi32_mask() {
50789 #[rustfmt::skip]
50790 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50791 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50792 #[rustfmt::skip]
50793 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50794 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50795 let m = _mm512_cmpeq_epi32_mask(b, a);
50796 assert_eq!(m, 0b11001111_11001111);
50797 }
50798
50799 #[simd_test(enable = "avx512f")]
50800 unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
50801 #[rustfmt::skip]
50802 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50803 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50804 #[rustfmt::skip]
50805 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50806 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50807 let mask = 0b01111010_01111010;
50808 let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
50809 assert_eq!(r, 0b01001010_01001010);
50810 }
50811
50812 #[simd_test(enable = "avx512f,avx512vl")]
50813 unsafe fn test_mm256_cmpeq_epi32_mask() {
50814 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50815 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50816 let m = _mm256_cmpeq_epi32_mask(b, a);
50817 assert_eq!(m, 0b11001111);
50818 }
50819
50820 #[simd_test(enable = "avx512f,avx512vl")]
50821 unsafe fn test_mm256_mask_cmpeq_epi32_mask() {
50822 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50823 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50824 let mask = 0b01111010;
50825 let r = _mm256_mask_cmpeq_epi32_mask(mask, b, a);
50826 assert_eq!(r, 0b01001010);
50827 }
50828
50829 #[simd_test(enable = "avx512f,avx512vl")]
50830 unsafe fn test_mm_cmpeq_epi32_mask() {
50831 let a = _mm_set_epi32(0, 1, -1, 13);
50832 let b = _mm_set_epi32(0, 1, 13, 42);
50833 let m = _mm_cmpeq_epi32_mask(b, a);
50834 assert_eq!(m, 0b00001100);
50835 }
50836
50837 #[simd_test(enable = "avx512f,avx512vl")]
50838 unsafe fn test_mm_mask_cmpeq_epi32_mask() {
50839 let a = _mm_set_epi32(0, 1, -1, 13);
50840 let b = _mm_set_epi32(0, 1, 13, 42);
50841 let mask = 0b11111111;
50842 let r = _mm_mask_cmpeq_epi32_mask(mask, b, a);
50843 assert_eq!(r, 0b00001100);
50844 }
50845
50846 #[simd_test(enable = "avx512f")]
50847 unsafe fn test_mm512_cmpneq_epi32_mask() {
50848 #[rustfmt::skip]
50849 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50850 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50851 #[rustfmt::skip]
50852 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50853 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50854 let m = _mm512_cmpneq_epi32_mask(b, a);
50855 assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
50856 }
50857
50858 #[simd_test(enable = "avx512f")]
50859 unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
50860 #[rustfmt::skip]
50861 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
50862 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
50863 #[rustfmt::skip]
50864 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50865 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50866 let mask = 0b01111010_01111010;
50867 let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
50868 assert_eq!(r, 0b00110010_00110010)
50869 }
50870
50871 #[simd_test(enable = "avx512f,avx512vl")]
50872 unsafe fn test_mm256_cmpneq_epi32_mask() {
50873 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50874 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50875 let m = _mm256_cmpneq_epi32_mask(b, a);
50876 assert_eq!(m, !_mm256_cmpeq_epi32_mask(b, a));
50877 }
50878
50879 #[simd_test(enable = "avx512f,avx512vl")]
50880 unsafe fn test_mm256_mask_cmpneq_epi32_mask() {
50881 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
50882 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50883 let mask = 0b11111111;
50884 let r = _mm256_mask_cmpneq_epi32_mask(mask, b, a);
50885 assert_eq!(r, 0b00110011)
50886 }
50887
50888 #[simd_test(enable = "avx512f,avx512vl")]
50889 unsafe fn test_mm_cmpneq_epi32_mask() {
50890 let a = _mm_set_epi32(0, 1, -1, 13);
50891 let b = _mm_set_epi32(0, 1, 13, 42);
50892 let r = _mm_cmpneq_epi32_mask(b, a);
50893 assert_eq!(r, 0b00000011)
50894 }
50895
50896 #[simd_test(enable = "avx512f,avx512vl")]
50897 unsafe fn test_mm_mask_cmpneq_epi32_mask() {
50898 let a = _mm_set_epi32(0, 1, -1, 13);
50899 let b = _mm_set_epi32(0, 1, 13, 42);
50900 let mask = 0b11111111;
50901 let r = _mm_mask_cmpneq_epi32_mask(mask, b, a);
50902 assert_eq!(r, 0b00000011)
50903 }
50904
50905 #[simd_test(enable = "avx512f")]
50906 unsafe fn test_mm512_cmp_epi32_mask() {
50907 #[rustfmt::skip]
50908 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50909 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50910 let b = _mm512_set1_epi32(-1);
50911 let m = _mm512_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50912 assert_eq!(m, 0b00000101_00000101);
50913 }
50914
50915 #[simd_test(enable = "avx512f")]
50916 unsafe fn test_mm512_mask_cmp_epi32_mask() {
50917 #[rustfmt::skip]
50918 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50919 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50920 let b = _mm512_set1_epi32(-1);
50921 let mask = 0b01100110_01100110;
50922 let r = _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50923 assert_eq!(r, 0b00000100_00000100);
50924 }
50925
50926 #[simd_test(enable = "avx512f,avx512vl")]
50927 unsafe fn test_mm256_cmp_epi32_mask() {
50928 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50929 let b = _mm256_set1_epi32(-1);
50930 let m = _mm256_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50931 assert_eq!(m, 0b00000101);
50932 }
50933
50934 #[simd_test(enable = "avx512f,avx512vl")]
50935 unsafe fn test_mm256_mask_cmp_epi32_mask() {
50936 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50937 let b = _mm256_set1_epi32(-1);
50938 let mask = 0b01100110;
50939 let r = _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50940 assert_eq!(r, 0b00000100);
50941 }
50942
50943 #[simd_test(enable = "avx512f,avx512vl")]
50944 unsafe fn test_mm_cmp_epi32_mask() {
50945 let a = _mm_set_epi32(0, 1, -1, 13);
50946 let b = _mm_set1_epi32(1);
50947 let m = _mm_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50948 assert_eq!(m, 0b00001010);
50949 }
50950
50951 #[simd_test(enable = "avx512f,avx512vl")]
50952 unsafe fn test_mm_mask_cmp_epi32_mask() {
50953 let a = _mm_set_epi32(0, 1, -1, 13);
50954 let b = _mm_set1_epi32(1);
50955 let mask = 0b11111111;
50956 let r = _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50957 assert_eq!(r, 0b00001010);
50958 }
50959
50960 #[simd_test(enable = "avx512f")]
50961 unsafe fn test_mm512_set_epi8() {
50962 let r = _mm512_set1_epi8(2);
50963 assert_eq_m512i(
50964 r,
50965 _mm512_set_epi8(
50966 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50967 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50968 2, 2, 2, 2, 2, 2, 2, 2,
50969 ),
50970 )
50971 }
50972
50973 #[simd_test(enable = "avx512f")]
50974 unsafe fn test_mm512_set_epi16() {
50975 let r = _mm512_set1_epi16(2);
50976 assert_eq_m512i(
50977 r,
50978 _mm512_set_epi16(
50979 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50980 2, 2, 2, 2,
50981 ),
50982 )
50983 }
50984
50985 #[simd_test(enable = "avx512f")]
50986 unsafe fn test_mm512_set_epi32() {
50987 let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50988 assert_eq_m512i(
50989 r,
50990 _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
50991 )
50992 }
50993
50994 #[simd_test(enable = "avx512f")]
50995 unsafe fn test_mm512_setr_epi32() {
50996 let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50997 assert_eq_m512i(
50998 r,
50999 _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
51000 )
51001 }
51002
51003 #[simd_test(enable = "avx512f")]
51004 unsafe fn test_mm512_set1_epi8() {
51005 let r = _mm512_set_epi8(
51006 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51007 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51008 2, 2, 2, 2, 2, 2,
51009 );
51010 assert_eq_m512i(r, _mm512_set1_epi8(2));
51011 }
51012
51013 #[simd_test(enable = "avx512f")]
51014 unsafe fn test_mm512_set1_epi16() {
51015 let r = _mm512_set_epi16(
51016 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
51017 2, 2, 2,
51018 );
51019 assert_eq_m512i(r, _mm512_set1_epi16(2));
51020 }
51021
51022 #[simd_test(enable = "avx512f")]
51023 unsafe fn test_mm512_set1_epi32() {
51024 let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
51025 assert_eq_m512i(r, _mm512_set1_epi32(2));
51026 }
51027
51028 #[simd_test(enable = "avx512f")]
51029 unsafe fn test_mm512_setzero_si512() {
51030 assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
51031 }
51032
51033 #[simd_test(enable = "avx512f")]
51034 unsafe fn test_mm512_setzero_epi32() {
51035 assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32());
51036 }
51037
51038 #[simd_test(enable = "avx512f")]
51039 unsafe fn test_mm512_set_ps() {
51040 let r = _mm512_setr_ps(
51041 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51042 );
51043 assert_eq_m512(
51044 r,
51045 _mm512_set_ps(
51046 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
51047 ),
51048 )
51049 }
51050
51051 #[simd_test(enable = "avx512f")]
51052 unsafe fn test_mm512_setr_ps() {
51053 let r = _mm512_set_ps(
51054 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
51055 );
51056 assert_eq_m512(
51057 r,
51058 _mm512_setr_ps(
51059 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
51060 ),
51061 )
51062 }
51063
51064 #[simd_test(enable = "avx512f")]
51065 unsafe fn test_mm512_set1_ps() {
51066 #[rustfmt::skip]
51067 let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
51068 2., 2., 2., 2., 2., 2., 2., 2.);
51069 assert_eq_m512(expected, _mm512_set1_ps(2.));
51070 }
51071
51072 #[simd_test(enable = "avx512f")]
51073 unsafe fn test_mm512_set4_epi32() {
51074 let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
51075 assert_eq_m512i(r, _mm512_set4_epi32(4, 3, 2, 1));
51076 }
51077
51078 #[simd_test(enable = "avx512f")]
51079 unsafe fn test_mm512_set4_ps() {
51080 let r = _mm512_set_ps(
51081 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
51082 );
51083 assert_eq_m512(r, _mm512_set4_ps(4., 3., 2., 1.));
51084 }
51085
51086 #[simd_test(enable = "avx512f")]
51087 unsafe fn test_mm512_setr4_epi32() {
51088 let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
51089 assert_eq_m512i(r, _mm512_setr4_epi32(1, 2, 3, 4));
51090 }
51091
51092 #[simd_test(enable = "avx512f")]
51093 unsafe fn test_mm512_setr4_ps() {
51094 let r = _mm512_set_ps(
51095 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
51096 );
51097 assert_eq_m512(r, _mm512_setr4_ps(1., 2., 3., 4.));
51098 }
51099
51100 #[simd_test(enable = "avx512f")]
51101 unsafe fn test_mm512_setzero_ps() {
51102 assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
51103 }
51104
51105 #[simd_test(enable = "avx512f")]
51106 unsafe fn test_mm512_setzero() {
51107 assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.));
51108 }
51109
51110 #[simd_test(enable = "avx512f")]
51111 unsafe fn test_mm512_loadu_pd() {
51112 let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
51113 let p = a.as_ptr();
51114 let r = _mm512_loadu_pd(black_box(p));
51115 let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
51116 assert_eq_m512d(r, e);
51117 }
51118
51119 #[simd_test(enable = "avx512f")]
51120 unsafe fn test_mm512_storeu_pd() {
51121 let a = _mm512_set1_pd(9.);
51122 let mut r = _mm512_undefined_pd();
51123 _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
51124 assert_eq_m512d(r, a);
51125 }
51126
51127 #[simd_test(enable = "avx512f")]
51128 unsafe fn test_mm512_loadu_ps() {
51129 let a = &[
51130 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
51131 ];
51132 let p = a.as_ptr();
51133 let r = _mm512_loadu_ps(black_box(p));
51134 let e = _mm512_setr_ps(
51135 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
51136 );
51137 assert_eq_m512(r, e);
51138 }
51139
51140 #[simd_test(enable = "avx512f")]
51141 unsafe fn test_mm512_storeu_ps() {
51142 let a = _mm512_set1_ps(9.);
51143 let mut r = _mm512_undefined_ps();
51144 _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
51145 assert_eq_m512(r, a);
51146 }
51147
51148 #[simd_test(enable = "avx512f")]
51149 unsafe fn test_mm512_mask_loadu_epi32() {
51150 let src = _mm512_set1_epi32(42);
51151 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
51152 let p = a.as_ptr();
51153 let m = 0b11101000_11001010;
51154 let r = _mm512_mask_loadu_epi32(src, m, black_box(p));
51155 let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51156 assert_eq_m512i(r, e);
51157 }
51158
51159 #[simd_test(enable = "avx512f")]
51160 unsafe fn test_mm512_maskz_loadu_epi32() {
51161 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
51162 let p = a.as_ptr();
51163 let m = 0b11101000_11001010;
51164 let r = _mm512_maskz_loadu_epi32(m, black_box(p));
51165 let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
51166 assert_eq_m512i(r, e);
51167 }
51168
51169 #[simd_test(enable = "avx512f")]
51170 unsafe fn test_mm512_mask_load_epi32() {
51171 #[repr(align(64))]
51172 struct Align {
51173 data: [i32; 16], // 64 bytes
51174 }
51175 let src = _mm512_set1_epi32(42);
51176 let a = Align {
51177 data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
51178 };
51179 let p = a.data.as_ptr();
51180 let m = 0b11101000_11001010;
51181 let r = _mm512_mask_load_epi32(src, m, black_box(p));
51182 let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51183 assert_eq_m512i(r, e);
51184 }
51185
51186 #[simd_test(enable = "avx512f")]
51187 unsafe fn test_mm512_maskz_load_epi32() {
51188 #[repr(align(64))]
51189 struct Align {
51190 data: [i32; 16], // 64 bytes
51191 }
51192 let a = Align {
51193 data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
51194 };
51195 let p = a.data.as_ptr();
51196 let m = 0b11101000_11001010;
51197 let r = _mm512_maskz_load_epi32(m, black_box(p));
51198 let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
51199 assert_eq_m512i(r, e);
51200 }
51201
51202 #[simd_test(enable = "avx512f")]
51203 unsafe fn test_mm512_mask_storeu_epi32() {
51204 let mut r = [42_i32; 16];
51205 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51206 let m = 0b11101000_11001010;
51207 _mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51208 let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51209 assert_eq_m512i(_mm512_loadu_epi32(r.as_ptr()), e);
51210 }
51211
51212 #[simd_test(enable = "avx512f")]
51213 unsafe fn test_mm512_mask_store_epi32() {
51214 #[repr(align(64))]
51215 struct Align {
51216 data: [i32; 16],
51217 }
51218 let mut r = Align { data: [42; 16] };
51219 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51220 let m = 0b11101000_11001010;
51221 _mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51222 let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51223 assert_eq_m512i(_mm512_load_epi32(r.data.as_ptr()), e);
51224 }
51225
51226 #[simd_test(enable = "avx512f")]
51227 unsafe fn test_mm512_mask_loadu_epi64() {
51228 let src = _mm512_set1_epi64(42);
51229 let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
51230 let p = a.as_ptr();
51231 let m = 0b11001010;
51232 let r = _mm512_mask_loadu_epi64(src, m, black_box(p));
51233 let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51234 assert_eq_m512i(r, e);
51235 }
51236
51237 #[simd_test(enable = "avx512f")]
51238 unsafe fn test_mm512_maskz_loadu_epi64() {
51239 let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
51240 let p = a.as_ptr();
51241 let m = 0b11001010;
51242 let r = _mm512_maskz_loadu_epi64(m, black_box(p));
51243 let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
51244 assert_eq_m512i(r, e);
51245 }
51246
51247 #[simd_test(enable = "avx512f")]
51248 unsafe fn test_mm512_mask_load_epi64() {
51249 #[repr(align(64))]
51250 struct Align {
51251 data: [i64; 8], // 64 bytes
51252 }
51253 let src = _mm512_set1_epi64(42);
51254 let a = Align {
51255 data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
51256 };
51257 let p = a.data.as_ptr();
51258 let m = 0b11001010;
51259 let r = _mm512_mask_load_epi64(src, m, black_box(p));
51260 let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51261 assert_eq_m512i(r, e);
51262 }
51263
51264 #[simd_test(enable = "avx512f")]
51265 unsafe fn test_mm512_maskz_load_epi64() {
51266 #[repr(align(64))]
51267 struct Align {
51268 data: [i64; 8], // 64 bytes
51269 }
51270 let a = Align {
51271 data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
51272 };
51273 let p = a.data.as_ptr();
51274 let m = 0b11001010;
51275 let r = _mm512_maskz_load_epi64(m, black_box(p));
51276 let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
51277 assert_eq_m512i(r, e);
51278 }
51279
51280 #[simd_test(enable = "avx512f")]
51281 unsafe fn test_mm512_mask_storeu_epi64() {
51282 let mut r = [42_i64; 8];
51283 let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
51284 let m = 0b11001010;
51285 _mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51286 let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51287 assert_eq_m512i(_mm512_loadu_epi64(r.as_ptr()), e);
51288 }
51289
51290 #[simd_test(enable = "avx512f")]
51291 unsafe fn test_mm512_mask_store_epi64() {
51292 #[repr(align(64))]
51293 struct Align {
51294 data: [i64; 8],
51295 }
51296 let mut r = Align { data: [42; 8] };
51297 let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
51298 let m = 0b11001010;
51299 let p = r.data.as_mut_ptr();
51300 _mm512_mask_store_epi64(p, m, a);
51301 let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51302 assert_eq_m512i(_mm512_load_epi64(r.data.as_ptr()), e);
51303 }
51304
51305 #[simd_test(enable = "avx512f")]
51306 unsafe fn test_mm512_mask_loadu_ps() {
51307 let src = _mm512_set1_ps(42.0);
51308 let a = &[
51309 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
51310 16.0,
51311 ];
51312 let p = a.as_ptr();
51313 let m = 0b11101000_11001010;
51314 let r = _mm512_mask_loadu_ps(src, m, black_box(p));
51315 let e = _mm512_setr_ps(
51316 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51317 16.0,
51318 );
51319 assert_eq_m512(r, e);
51320 }
51321
51322 #[simd_test(enable = "avx512f")]
51323 unsafe fn test_mm512_maskz_loadu_ps() {
51324 let a = &[
51325 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
51326 16.0,
51327 ];
51328 let p = a.as_ptr();
51329 let m = 0b11101000_11001010;
51330 let r = _mm512_maskz_loadu_ps(m, black_box(p));
51331 let e = _mm512_setr_ps(
51332 0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
51333 );
51334 assert_eq_m512(r, e);
51335 }
51336
51337 #[simd_test(enable = "avx512f")]
51338 unsafe fn test_mm512_mask_load_ps() {
51339 #[repr(align(64))]
51340 struct Align {
51341 data: [f32; 16], // 64 bytes
51342 }
51343 let src = _mm512_set1_ps(42.0);
51344 let a = Align {
51345 data: [
51346 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
51347 15.0, 16.0,
51348 ],
51349 };
51350 let p = a.data.as_ptr();
51351 let m = 0b11101000_11001010;
51352 let r = _mm512_mask_load_ps(src, m, black_box(p));
51353 let e = _mm512_setr_ps(
51354 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51355 16.0,
51356 );
51357 assert_eq_m512(r, e);
51358 }
51359
51360 #[simd_test(enable = "avx512f")]
51361 unsafe fn test_mm512_maskz_load_ps() {
51362 #[repr(align(64))]
51363 struct Align {
51364 data: [f32; 16], // 64 bytes
51365 }
51366 let a = Align {
51367 data: [
51368 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
51369 15.0, 16.0,
51370 ],
51371 };
51372 let p = a.data.as_ptr();
51373 let m = 0b11101000_11001010;
51374 let r = _mm512_maskz_load_ps(m, black_box(p));
51375 let e = _mm512_setr_ps(
51376 0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
51377 );
51378 assert_eq_m512(r, e);
51379 }
51380
51381 #[simd_test(enable = "avx512f")]
51382 unsafe fn test_mm512_mask_storeu_ps() {
51383 let mut r = [42_f32; 16];
51384 let a = _mm512_setr_ps(
51385 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
51386 );
51387 let m = 0b11101000_11001010;
51388 _mm512_mask_storeu_ps(r.as_mut_ptr(), m, a);
51389 let e = _mm512_setr_ps(
51390 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51391 16.0,
51392 );
51393 assert_eq_m512(_mm512_loadu_ps(r.as_ptr()), e);
51394 }
51395
51396 #[simd_test(enable = "avx512f")]
51397 unsafe fn test_mm512_mask_store_ps() {
51398 #[repr(align(64))]
51399 struct Align {
51400 data: [f32; 16],
51401 }
51402 let mut r = Align { data: [42.0; 16] };
51403 let a = _mm512_setr_ps(
51404 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
51405 );
51406 let m = 0b11101000_11001010;
51407 _mm512_mask_store_ps(r.data.as_mut_ptr(), m, a);
51408 let e = _mm512_setr_ps(
51409 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51410 16.0,
51411 );
51412 assert_eq_m512(_mm512_load_ps(r.data.as_ptr()), e);
51413 }
51414
51415 #[simd_test(enable = "avx512f")]
51416 unsafe fn test_mm512_mask_loadu_pd() {
51417 let src = _mm512_set1_pd(42.0);
51418 let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51419 let p = a.as_ptr();
51420 let m = 0b11001010;
51421 let r = _mm512_mask_loadu_pd(src, m, black_box(p));
51422 let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51423 assert_eq_m512d(r, e);
51424 }
51425
51426 #[simd_test(enable = "avx512f")]
51427 unsafe fn test_mm512_maskz_loadu_pd() {
51428 let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51429 let p = a.as_ptr();
51430 let m = 0b11001010;
51431 let r = _mm512_maskz_loadu_pd(m, black_box(p));
51432 let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51433 assert_eq_m512d(r, e);
51434 }
51435
51436 #[simd_test(enable = "avx512f")]
51437 unsafe fn test_mm512_mask_load_pd() {
51438 #[repr(align(64))]
51439 struct Align {
51440 data: [f64; 8], // 64 bytes
51441 }
51442 let src = _mm512_set1_pd(42.0);
51443 let a = Align {
51444 data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51445 };
51446 let p = a.data.as_ptr();
51447 let m = 0b11001010;
51448 let r = _mm512_mask_load_pd(src, m, black_box(p));
51449 let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51450 assert_eq_m512d(r, e);
51451 }
51452
51453 #[simd_test(enable = "avx512f")]
51454 unsafe fn test_mm512_maskz_load_pd() {
51455 #[repr(align(64))]
51456 struct Align {
51457 data: [f64; 8], // 64 bytes
51458 }
51459 let a = Align {
51460 data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51461 };
51462 let p = a.data.as_ptr();
51463 let m = 0b11001010;
51464 let r = _mm512_maskz_load_pd(m, black_box(p));
51465 let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51466 assert_eq_m512d(r, e);
51467 }
51468
51469 #[simd_test(enable = "avx512f")]
51470 unsafe fn test_mm512_mask_storeu_pd() {
51471 let mut r = [42_f64; 8];
51472 let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51473 let m = 0b11001010;
51474 _mm512_mask_storeu_pd(r.as_mut_ptr(), m, a);
51475 let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51476 assert_eq_m512d(_mm512_loadu_pd(r.as_ptr()), e);
51477 }
51478
51479 #[simd_test(enable = "avx512f")]
51480 unsafe fn test_mm512_mask_store_pd() {
51481 #[repr(align(64))]
51482 struct Align {
51483 data: [f64; 8],
51484 }
51485 let mut r = Align { data: [42.0; 8] };
51486 let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51487 let m = 0b11001010;
51488 _mm512_mask_store_pd(r.data.as_mut_ptr(), m, a);
51489 let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51490 assert_eq_m512d(_mm512_load_pd(r.data.as_ptr()), e);
51491 }
51492
51493 #[simd_test(enable = "avx512f,avx512vl")]
51494 unsafe fn test_mm256_mask_loadu_epi32() {
51495 let src = _mm256_set1_epi32(42);
51496 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
51497 let p = a.as_ptr();
51498 let m = 0b11001010;
51499 let r = _mm256_mask_loadu_epi32(src, m, black_box(p));
51500 let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51501 assert_eq_m256i(r, e);
51502 }
51503
51504 #[simd_test(enable = "avx512f,avx512vl")]
51505 unsafe fn test_mm256_maskz_loadu_epi32() {
51506 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
51507 let p = a.as_ptr();
51508 let m = 0b11001010;
51509 let r = _mm256_maskz_loadu_epi32(m, black_box(p));
51510 let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
51511 assert_eq_m256i(r, e);
51512 }
51513
51514 #[simd_test(enable = "avx512f,avx512vl")]
51515 unsafe fn test_mm256_mask_load_epi32() {
51516 #[repr(align(32))]
51517 struct Align {
51518 data: [i32; 8], // 32 bytes
51519 }
51520 let src = _mm256_set1_epi32(42);
51521 let a = Align {
51522 data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
51523 };
51524 let p = a.data.as_ptr();
51525 let m = 0b11001010;
51526 let r = _mm256_mask_load_epi32(src, m, black_box(p));
51527 let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51528 assert_eq_m256i(r, e);
51529 }
51530
51531 #[simd_test(enable = "avx512f,avx512vl")]
51532 unsafe fn test_mm256_maskz_load_epi32() {
51533 #[repr(align(32))]
51534 struct Align {
51535 data: [i32; 8], // 32 bytes
51536 }
51537 let a = Align {
51538 data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
51539 };
51540 let p = a.data.as_ptr();
51541 let m = 0b11001010;
51542 let r = _mm256_maskz_load_epi32(m, black_box(p));
51543 let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
51544 assert_eq_m256i(r, e);
51545 }
51546
51547 #[simd_test(enable = "avx512f,avx512vl")]
51548 unsafe fn test_mm256_mask_storeu_epi32() {
51549 let mut r = [42_i32; 8];
51550 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
51551 let m = 0b11001010;
51552 _mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51553 let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51554 assert_eq_m256i(_mm256_loadu_epi32(r.as_ptr()), e);
51555 }
51556
51557 #[simd_test(enable = "avx512f,avx512vl")]
51558 unsafe fn test_mm256_mask_store_epi32() {
51559 #[repr(align(64))]
51560 struct Align {
51561 data: [i32; 8],
51562 }
51563 let mut r = Align { data: [42; 8] };
51564 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
51565 let m = 0b11001010;
51566 _mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51567 let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51568 assert_eq_m256i(_mm256_load_epi32(r.data.as_ptr()), e);
51569 }
51570
51571 #[simd_test(enable = "avx512f,avx512vl")]
51572 unsafe fn test_mm256_mask_loadu_epi64() {
51573 let src = _mm256_set1_epi64x(42);
51574 let a = &[1_i64, 2, 3, 4];
51575 let p = a.as_ptr();
51576 let m = 0b1010;
51577 let r = _mm256_mask_loadu_epi64(src, m, black_box(p));
51578 let e = _mm256_setr_epi64x(42, 2, 42, 4);
51579 assert_eq_m256i(r, e);
51580 }
51581
51582 #[simd_test(enable = "avx512f,avx512vl")]
51583 unsafe fn test_mm256_maskz_loadu_epi64() {
51584 let a = &[1_i64, 2, 3, 4];
51585 let p = a.as_ptr();
51586 let m = 0b1010;
51587 let r = _mm256_maskz_loadu_epi64(m, black_box(p));
51588 let e = _mm256_setr_epi64x(0, 2, 0, 4);
51589 assert_eq_m256i(r, e);
51590 }
51591
51592 #[simd_test(enable = "avx512f,avx512vl")]
51593 unsafe fn test_mm256_mask_load_epi64() {
51594 #[repr(align(32))]
51595 struct Align {
51596 data: [i64; 4], // 32 bytes
51597 }
51598 let src = _mm256_set1_epi64x(42);
51599 let a = Align {
51600 data: [1_i64, 2, 3, 4],
51601 };
51602 let p = a.data.as_ptr();
51603 let m = 0b1010;
51604 let r = _mm256_mask_load_epi64(src, m, black_box(p));
51605 let e = _mm256_setr_epi64x(42, 2, 42, 4);
51606 assert_eq_m256i(r, e);
51607 }
51608
51609 #[simd_test(enable = "avx512f,avx512vl")]
51610 unsafe fn test_mm256_maskz_load_epi64() {
51611 #[repr(align(32))]
51612 struct Align {
51613 data: [i64; 4], // 32 bytes
51614 }
51615 let a = Align {
51616 data: [1_i64, 2, 3, 4],
51617 };
51618 let p = a.data.as_ptr();
51619 let m = 0b1010;
51620 let r = _mm256_maskz_load_epi64(m, black_box(p));
51621 let e = _mm256_setr_epi64x(0, 2, 0, 4);
51622 assert_eq_m256i(r, e);
51623 }
51624
51625 #[simd_test(enable = "avx512f,avx512vl")]
51626 unsafe fn test_mm256_mask_storeu_epi64() {
51627 let mut r = [42_i64; 4];
51628 let a = _mm256_setr_epi64x(1, 2, 3, 4);
51629 let m = 0b1010;
51630 _mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51631 let e = _mm256_setr_epi64x(42, 2, 42, 4);
51632 assert_eq_m256i(_mm256_loadu_epi64(r.as_ptr()), e);
51633 }
51634
51635 #[simd_test(enable = "avx512f,avx512vl")]
51636 unsafe fn test_mm256_mask_store_epi64() {
51637 #[repr(align(32))]
51638 struct Align {
51639 data: [i64; 4],
51640 }
51641 let mut r = Align { data: [42; 4] };
51642 let a = _mm256_setr_epi64x(1, 2, 3, 4);
51643 let m = 0b1010;
51644 _mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a);
51645 let e = _mm256_setr_epi64x(42, 2, 42, 4);
51646 assert_eq_m256i(_mm256_load_epi64(r.data.as_ptr()), e);
51647 }
51648
51649 #[simd_test(enable = "avx512f,avx512vl")]
51650 unsafe fn test_mm256_mask_loadu_ps() {
51651 let src = _mm256_set1_ps(42.0);
51652 let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51653 let p = a.as_ptr();
51654 let m = 0b11001010;
51655 let r = _mm256_mask_loadu_ps(src, m, black_box(p));
51656 let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51657 assert_eq_m256(r, e);
51658 }
51659
51660 #[simd_test(enable = "avx512f,avx512vl")]
51661 unsafe fn test_mm256_maskz_loadu_ps() {
51662 let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51663 let p = a.as_ptr();
51664 let m = 0b11001010;
51665 let r = _mm256_maskz_loadu_ps(m, black_box(p));
51666 let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51667 assert_eq_m256(r, e);
51668 }
51669
51670 #[simd_test(enable = "avx512f,avx512vl")]
51671 unsafe fn test_mm256_mask_load_ps() {
51672 #[repr(align(32))]
51673 struct Align {
51674 data: [f32; 8], // 32 bytes
51675 }
51676 let src = _mm256_set1_ps(42.0);
51677 let a = Align {
51678 data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51679 };
51680 let p = a.data.as_ptr();
51681 let m = 0b11001010;
51682 let r = _mm256_mask_load_ps(src, m, black_box(p));
51683 let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51684 assert_eq_m256(r, e);
51685 }
51686
51687 #[simd_test(enable = "avx512f,avx512vl")]
51688 unsafe fn test_mm256_maskz_load_ps() {
51689 #[repr(align(32))]
51690 struct Align {
51691 data: [f32; 8], // 32 bytes
51692 }
51693 let a = Align {
51694 data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51695 };
51696 let p = a.data.as_ptr();
51697 let m = 0b11001010;
51698 let r = _mm256_maskz_load_ps(m, black_box(p));
51699 let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51700 assert_eq_m256(r, e);
51701 }
51702
51703 #[simd_test(enable = "avx512f,avx512vl")]
51704 unsafe fn test_mm256_mask_storeu_ps() {
51705 let mut r = [42_f32; 8];
51706 let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51707 let m = 0b11001010;
51708 _mm256_mask_storeu_ps(r.as_mut_ptr(), m, a);
51709 let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51710 assert_eq_m256(_mm256_loadu_ps(r.as_ptr()), e);
51711 }
51712
51713 #[simd_test(enable = "avx512f,avx512vl")]
51714 unsafe fn test_mm256_mask_store_ps() {
51715 #[repr(align(32))]
51716 struct Align {
51717 data: [f32; 8],
51718 }
51719 let mut r = Align { data: [42.0; 8] };
51720 let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51721 let m = 0b11001010;
51722 _mm256_mask_store_ps(r.data.as_mut_ptr(), m, a);
51723 let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51724 assert_eq_m256(_mm256_load_ps(r.data.as_ptr()), e);
51725 }
51726
51727 #[simd_test(enable = "avx512f,avx512vl")]
51728 unsafe fn test_mm256_mask_loadu_pd() {
51729 let src = _mm256_set1_pd(42.0);
51730 let a = &[1.0_f64, 2.0, 3.0, 4.0];
51731 let p = a.as_ptr();
51732 let m = 0b1010;
51733 let r = _mm256_mask_loadu_pd(src, m, black_box(p));
51734 let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51735 assert_eq_m256d(r, e);
51736 }
51737
51738 #[simd_test(enable = "avx512f,avx512vl")]
51739 unsafe fn test_mm256_maskz_loadu_pd() {
51740 let a = &[1.0_f64, 2.0, 3.0, 4.0];
51741 let p = a.as_ptr();
51742 let m = 0b1010;
51743 let r = _mm256_maskz_loadu_pd(m, black_box(p));
51744 let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
51745 assert_eq_m256d(r, e);
51746 }
51747
51748 #[simd_test(enable = "avx512f,avx512vl")]
51749 unsafe fn test_mm256_mask_load_pd() {
51750 #[repr(align(32))]
51751 struct Align {
51752 data: [f64; 4], // 32 bytes
51753 }
51754 let src = _mm256_set1_pd(42.0);
51755 let a = Align {
51756 data: [1.0_f64, 2.0, 3.0, 4.0],
51757 };
51758 let p = a.data.as_ptr();
51759 let m = 0b1010;
51760 let r = _mm256_mask_load_pd(src, m, black_box(p));
51761 let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51762 assert_eq_m256d(r, e);
51763 }
51764
51765 #[simd_test(enable = "avx512f,avx512vl")]
51766 unsafe fn test_mm256_maskz_load_pd() {
51767 #[repr(align(32))]
51768 struct Align {
51769 data: [f64; 4], // 32 bytes
51770 }
51771 let a = Align {
51772 data: [1.0_f64, 2.0, 3.0, 4.0],
51773 };
51774 let p = a.data.as_ptr();
51775 let m = 0b1010;
51776 let r = _mm256_maskz_load_pd(m, black_box(p));
51777 let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
51778 assert_eq_m256d(r, e);
51779 }
51780
51781 #[simd_test(enable = "avx512f,avx512vl")]
51782 unsafe fn test_mm256_mask_storeu_pd() {
51783 let mut r = [42_f64; 4];
51784 let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
51785 let m = 0b1010;
51786 _mm256_mask_storeu_pd(r.as_mut_ptr(), m, a);
51787 let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51788 assert_eq_m256d(_mm256_loadu_pd(r.as_ptr()), e);
51789 }
51790
51791 #[simd_test(enable = "avx512f,avx512vl")]
51792 unsafe fn test_mm256_mask_store_pd() {
51793 #[repr(align(32))]
51794 struct Align {
51795 data: [f64; 4],
51796 }
51797 let mut r = Align { data: [42.0; 4] };
51798 let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
51799 let m = 0b1010;
51800 _mm256_mask_store_pd(r.data.as_mut_ptr(), m, a);
51801 let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51802 assert_eq_m256d(_mm256_load_pd(r.data.as_ptr()), e);
51803 }
51804
51805 #[simd_test(enable = "avx512f,avx512vl")]
51806 unsafe fn test_mm_mask_loadu_epi32() {
51807 let src = _mm_set1_epi32(42);
51808 let a = &[1_i32, 2, 3, 4];
51809 let p = a.as_ptr();
51810 let m = 0b1010;
51811 let r = _mm_mask_loadu_epi32(src, m, black_box(p));
51812 let e = _mm_setr_epi32(42, 2, 42, 4);
51813 assert_eq_m128i(r, e);
51814 }
51815
51816 #[simd_test(enable = "avx512f,avx512vl")]
51817 unsafe fn test_mm_maskz_loadu_epi32() {
51818 let a = &[1_i32, 2, 3, 4];
51819 let p = a.as_ptr();
51820 let m = 0b1010;
51821 let r = _mm_maskz_loadu_epi32(m, black_box(p));
51822 let e = _mm_setr_epi32(0, 2, 0, 4);
51823 assert_eq_m128i(r, e);
51824 }
51825
51826 #[simd_test(enable = "avx512f,avx512vl")]
51827 unsafe fn test_mm_mask_load_epi32() {
51828 #[repr(align(16))]
51829 struct Align {
51830 data: [i32; 4], // 32 bytes
51831 }
51832 let src = _mm_set1_epi32(42);
51833 let a = Align {
51834 data: [1_i32, 2, 3, 4],
51835 };
51836 let p = a.data.as_ptr();
51837 let m = 0b1010;
51838 let r = _mm_mask_load_epi32(src, m, black_box(p));
51839 let e = _mm_setr_epi32(42, 2, 42, 4);
51840 assert_eq_m128i(r, e);
51841 }
51842
51843 #[simd_test(enable = "avx512f,avx512vl")]
51844 unsafe fn test_mm_maskz_load_epi32() {
51845 #[repr(align(16))]
51846 struct Align {
51847 data: [i32; 4], // 16 bytes
51848 }
51849 let a = Align {
51850 data: [1_i32, 2, 3, 4],
51851 };
51852 let p = a.data.as_ptr();
51853 let m = 0b1010;
51854 let r = _mm_maskz_load_epi32(m, black_box(p));
51855 let e = _mm_setr_epi32(0, 2, 0, 4);
51856 assert_eq_m128i(r, e);
51857 }
51858
51859 #[simd_test(enable = "avx512f,avx512vl")]
51860 unsafe fn test_mm_mask_storeu_epi32() {
51861 let mut r = [42_i32; 4];
51862 let a = _mm_setr_epi32(1, 2, 3, 4);
51863 let m = 0b1010;
51864 _mm_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51865 let e = _mm_setr_epi32(42, 2, 42, 4);
51866 assert_eq_m128i(_mm_loadu_epi32(r.as_ptr()), e);
51867 }
51868
51869 #[simd_test(enable = "avx512f,avx512vl")]
51870 unsafe fn test_mm_mask_store_epi32() {
51871 #[repr(align(16))]
51872 struct Align {
51873 data: [i32; 4], // 16 bytes
51874 }
51875 let mut r = Align { data: [42; 4] };
51876 let a = _mm_setr_epi32(1, 2, 3, 4);
51877 let m = 0b1010;
51878 _mm_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51879 let e = _mm_setr_epi32(42, 2, 42, 4);
51880 assert_eq_m128i(_mm_load_epi32(r.data.as_ptr()), e);
51881 }
51882
51883 #[simd_test(enable = "avx512f,avx512vl")]
51884 unsafe fn test_mm_mask_loadu_epi64() {
51885 let src = _mm_set1_epi64x(42);
51886 let a = &[1_i64, 2];
51887 let p = a.as_ptr();
51888 let m = 0b10;
51889 let r = _mm_mask_loadu_epi64(src, m, black_box(p));
51890 let e = _mm_setr_epi64x(42, 2);
51891 assert_eq_m128i(r, e);
51892 }
51893
51894 #[simd_test(enable = "avx512f,avx512vl")]
51895 unsafe fn test_mm_maskz_loadu_epi64() {
51896 let a = &[1_i64, 2];
51897 let p = a.as_ptr();
51898 let m = 0b10;
51899 let r = _mm_maskz_loadu_epi64(m, black_box(p));
51900 let e = _mm_setr_epi64x(0, 2);
51901 assert_eq_m128i(r, e);
51902 }
51903
51904 #[simd_test(enable = "avx512f,avx512vl")]
51905 unsafe fn test_mm_mask_load_epi64() {
51906 #[repr(align(16))]
51907 struct Align {
51908 data: [i64; 2], // 16 bytes
51909 }
51910 let src = _mm_set1_epi64x(42);
51911 let a = Align { data: [1_i64, 2] };
51912 let p = a.data.as_ptr();
51913 let m = 0b10;
51914 let r = _mm_mask_load_epi64(src, m, black_box(p));
51915 let e = _mm_setr_epi64x(42, 2);
51916 assert_eq_m128i(r, e);
51917 }
51918
51919 #[simd_test(enable = "avx512f,avx512vl")]
51920 unsafe fn test_mm_maskz_load_epi64() {
51921 #[repr(align(16))]
51922 struct Align {
51923 data: [i64; 2], // 16 bytes
51924 }
51925 let a = Align { data: [1_i64, 2] };
51926 let p = a.data.as_ptr();
51927 let m = 0b10;
51928 let r = _mm_maskz_load_epi64(m, black_box(p));
51929 let e = _mm_setr_epi64x(0, 2);
51930 assert_eq_m128i(r, e);
51931 }
51932
51933 #[simd_test(enable = "avx512f,avx512vl")]
51934 unsafe fn test_mm_mask_storeu_epi64() {
51935 let mut r = [42_i64; 2];
51936 let a = _mm_setr_epi64x(1, 2);
51937 let m = 0b10;
51938 _mm_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51939 let e = _mm_setr_epi64x(42, 2);
51940 assert_eq_m128i(_mm_loadu_epi64(r.as_ptr()), e);
51941 }
51942
51943 #[simd_test(enable = "avx512f,avx512vl")]
51944 unsafe fn test_mm_mask_store_epi64() {
51945 #[repr(align(16))]
51946 struct Align {
51947 data: [i64; 2], // 16 bytes
51948 }
51949 let mut r = Align { data: [42; 2] };
51950 let a = _mm_setr_epi64x(1, 2);
51951 let m = 0b10;
51952 _mm_mask_store_epi64(r.data.as_mut_ptr(), m, a);
51953 let e = _mm_setr_epi64x(42, 2);
51954 assert_eq_m128i(_mm_load_epi64(r.data.as_ptr()), e);
51955 }
51956
51957 #[simd_test(enable = "avx512f,avx512vl")]
51958 unsafe fn test_mm_mask_loadu_ps() {
51959 let src = _mm_set1_ps(42.0);
51960 let a = &[1.0_f32, 2.0, 3.0, 4.0];
51961 let p = a.as_ptr();
51962 let m = 0b1010;
51963 let r = _mm_mask_loadu_ps(src, m, black_box(p));
51964 let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51965 assert_eq_m128(r, e);
51966 }
51967
51968 #[simd_test(enable = "avx512f,avx512vl")]
51969 unsafe fn test_mm_maskz_loadu_ps() {
51970 let a = &[1.0_f32, 2.0, 3.0, 4.0];
51971 let p = a.as_ptr();
51972 let m = 0b1010;
51973 let r = _mm_maskz_loadu_ps(m, black_box(p));
51974 let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
51975 assert_eq_m128(r, e);
51976 }
51977
51978 #[simd_test(enable = "avx512f,avx512vl")]
51979 unsafe fn test_mm_mask_load_ps() {
51980 #[repr(align(16))]
51981 struct Align {
51982 data: [f32; 4], // 16 bytes
51983 }
51984 let src = _mm_set1_ps(42.0);
51985 let a = Align {
51986 data: [1.0_f32, 2.0, 3.0, 4.0],
51987 };
51988 let p = a.data.as_ptr();
51989 let m = 0b1010;
51990 let r = _mm_mask_load_ps(src, m, black_box(p));
51991 let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51992 assert_eq_m128(r, e);
51993 }
51994
51995 #[simd_test(enable = "avx512f,avx512vl")]
51996 unsafe fn test_mm_maskz_load_ps() {
51997 #[repr(align(16))]
51998 struct Align {
51999 data: [f32; 4], // 16 bytes
52000 }
52001 let a = Align {
52002 data: [1.0_f32, 2.0, 3.0, 4.0],
52003 };
52004 let p = a.data.as_ptr();
52005 let m = 0b1010;
52006 let r = _mm_maskz_load_ps(m, black_box(p));
52007 let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
52008 assert_eq_m128(r, e);
52009 }
52010
52011 #[simd_test(enable = "avx512f,avx512vl")]
52012 unsafe fn test_mm_mask_storeu_ps() {
52013 let mut r = [42_f32; 4];
52014 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
52015 let m = 0b1010;
52016 _mm_mask_storeu_ps(r.as_mut_ptr(), m, a);
52017 let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
52018 assert_eq_m128(_mm_loadu_ps(r.as_ptr()), e);
52019 }
52020
52021 #[simd_test(enable = "avx512f,avx512vl")]
52022 unsafe fn test_mm_mask_store_ps() {
52023 #[repr(align(16))]
52024 struct Align {
52025 data: [f32; 4], // 16 bytes
52026 }
52027 let mut r = Align { data: [42.0; 4] };
52028 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
52029 let m = 0b1010;
52030 _mm_mask_store_ps(r.data.as_mut_ptr(), m, a);
52031 let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
52032 assert_eq_m128(_mm_load_ps(r.data.as_ptr()), e);
52033 }
52034
52035 #[simd_test(enable = "avx512f,avx512vl")]
52036 unsafe fn test_mm_mask_loadu_pd() {
52037 let src = _mm_set1_pd(42.0);
52038 let a = &[1.0_f64, 2.0];
52039 let p = a.as_ptr();
52040 let m = 0b10;
52041 let r = _mm_mask_loadu_pd(src, m, black_box(p));
52042 let e = _mm_setr_pd(42.0, 2.0);
52043 assert_eq_m128d(r, e);
52044 }
52045
52046 #[simd_test(enable = "avx512f,avx512vl")]
52047 unsafe fn test_mm_maskz_loadu_pd() {
52048 let a = &[1.0_f64, 2.0];
52049 let p = a.as_ptr();
52050 let m = 0b10;
52051 let r = _mm_maskz_loadu_pd(m, black_box(p));
52052 let e = _mm_setr_pd(0.0, 2.0);
52053 assert_eq_m128d(r, e);
52054 }
52055
52056 #[simd_test(enable = "avx512f,avx512vl")]
52057 unsafe fn test_mm_mask_load_pd() {
52058 #[repr(align(16))]
52059 struct Align {
52060 data: [f64; 2], // 16 bytes
52061 }
52062 let src = _mm_set1_pd(42.0);
52063 let a = Align {
52064 data: [1.0_f64, 2.0],
52065 };
52066 let p = a.data.as_ptr();
52067 let m = 0b10;
52068 let r = _mm_mask_load_pd(src, m, black_box(p));
52069 let e = _mm_setr_pd(42.0, 2.0);
52070 assert_eq_m128d(r, e);
52071 }
52072
52073 #[simd_test(enable = "avx512f,avx512vl")]
52074 unsafe fn test_mm_maskz_load_pd() {
52075 #[repr(align(16))]
52076 struct Align {
52077 data: [f64; 2], // 16 bytes
52078 }
52079 let a = Align {
52080 data: [1.0_f64, 2.0],
52081 };
52082 let p = a.data.as_ptr();
52083 let m = 0b10;
52084 let r = _mm_maskz_load_pd(m, black_box(p));
52085 let e = _mm_setr_pd(0.0, 2.0);
52086 assert_eq_m128d(r, e);
52087 }
52088
52089 #[simd_test(enable = "avx512f")]
52090 unsafe fn test_mm_mask_load_ss() {
52091 #[repr(align(16))]
52092 struct Align {
52093 data: f32,
52094 }
52095 let src = _mm_set_ss(2.0);
52096 let mem = Align { data: 1.0 };
52097 let r = _mm_mask_load_ss(src, 0b1, &mem.data);
52098 assert_eq_m128(r, _mm_set_ss(1.0));
52099 let r = _mm_mask_load_ss(src, 0b0, &mem.data);
52100 assert_eq_m128(r, _mm_set_ss(2.0));
52101 }
52102
52103 #[simd_test(enable = "avx512f")]
52104 unsafe fn test_mm_maskz_load_ss() {
52105 #[repr(align(16))]
52106 struct Align {
52107 data: f32,
52108 }
52109 let mem = Align { data: 1.0 };
52110 let r = _mm_maskz_load_ss(0b1, &mem.data);
52111 assert_eq_m128(r, _mm_set_ss(1.0));
52112 let r = _mm_maskz_load_ss(0b0, &mem.data);
52113 assert_eq_m128(r, _mm_set_ss(0.0));
52114 }
52115
52116 #[simd_test(enable = "avx512f")]
52117 unsafe fn test_mm_mask_load_sd() {
52118 #[repr(align(16))]
52119 struct Align {
52120 data: f64,
52121 }
52122 let src = _mm_set_sd(2.0);
52123 let mem = Align { data: 1.0 };
52124 let r = _mm_mask_load_sd(src, 0b1, &mem.data);
52125 assert_eq_m128d(r, _mm_set_sd(1.0));
52126 let r = _mm_mask_load_sd(src, 0b0, &mem.data);
52127 assert_eq_m128d(r, _mm_set_sd(2.0));
52128 }
52129
52130 #[simd_test(enable = "avx512f")]
52131 unsafe fn test_mm_maskz_load_sd() {
52132 #[repr(align(16))]
52133 struct Align {
52134 data: f64,
52135 }
52136 let mem = Align { data: 1.0 };
52137 let r = _mm_maskz_load_sd(0b1, &mem.data);
52138 assert_eq_m128d(r, _mm_set_sd(1.0));
52139 let r = _mm_maskz_load_sd(0b0, &mem.data);
52140 assert_eq_m128d(r, _mm_set_sd(0.0));
52141 }
52142
52143 #[simd_test(enable = "avx512f,avx512vl")]
52144 unsafe fn test_mm_mask_storeu_pd() {
52145 let mut r = [42_f64; 2];
52146 let a = _mm_setr_pd(1.0, 2.0);
52147 let m = 0b10;
52148 _mm_mask_storeu_pd(r.as_mut_ptr(), m, a);
52149 let e = _mm_setr_pd(42.0, 2.0);
52150 assert_eq_m128d(_mm_loadu_pd(r.as_ptr()), e);
52151 }
52152
52153 #[simd_test(enable = "avx512f,avx512vl")]
52154 unsafe fn test_mm_mask_store_pd() {
52155 #[repr(align(16))]
52156 struct Align {
52157 data: [f64; 2], // 16 bytes
52158 }
52159 let mut r = Align { data: [42.0; 2] };
52160 let a = _mm_setr_pd(1.0, 2.0);
52161 let m = 0b10;
52162 _mm_mask_store_pd(r.data.as_mut_ptr(), m, a);
52163 let e = _mm_setr_pd(42.0, 2.0);
52164 assert_eq_m128d(_mm_load_pd(r.data.as_ptr()), e);
52165 }
52166
52167 #[simd_test(enable = "avx512f")]
52168 unsafe fn test_mm_mask_store_ss() {
52169 #[repr(align(16))]
52170 struct Align {
52171 data: f32,
52172 }
52173 let a = _mm_set_ss(2.0);
52174 let mut mem = Align { data: 1.0 };
52175 _mm_mask_store_ss(&mut mem.data, 0b1, a);
52176 assert_eq!(mem.data, 2.0);
52177 _mm_mask_store_ss(&mut mem.data, 0b0, a);
52178 assert_eq!(mem.data, 2.0);
52179 }
52180
52181 #[simd_test(enable = "avx512f")]
52182 unsafe fn test_mm_mask_store_sd() {
52183 #[repr(align(16))]
52184 struct Align {
52185 data: f64,
52186 }
52187 let a = _mm_set_sd(2.0);
52188 let mut mem = Align { data: 1.0 };
52189 _mm_mask_store_sd(&mut mem.data, 0b1, a);
52190 assert_eq!(mem.data, 2.0);
52191 _mm_mask_store_sd(&mut mem.data, 0b0, a);
52192 assert_eq!(mem.data, 2.0);
52193 }
52194
52195 #[simd_test(enable = "avx512f")]
52196 unsafe fn test_mm512_setr_pd() {
52197 let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
52198 assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
52199 }
52200
52201 #[simd_test(enable = "avx512f")]
52202 unsafe fn test_mm512_set_pd() {
52203 let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
52204 assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
52205 }
52206
52207 #[simd_test(enable = "avx512f")]
52208 unsafe fn test_mm512_rol_epi32() {
52209 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52210 let r = _mm512_rol_epi32::<1>(a);
52211 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52212 assert_eq_m512i(r, e);
52213 }
52214
52215 #[simd_test(enable = "avx512f")]
52216 unsafe fn test_mm512_mask_rol_epi32() {
52217 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52218 let r = _mm512_mask_rol_epi32::<1>(a, 0, a);
52219 assert_eq_m512i(r, a);
52220 let r = _mm512_mask_rol_epi32::<1>(a, 0b11111111_11111111, a);
52221 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52222 assert_eq_m512i(r, e);
52223 }
52224
52225 #[simd_test(enable = "avx512f")]
52226 unsafe fn test_mm512_maskz_rol_epi32() {
52227 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52228 let r = _mm512_maskz_rol_epi32::<1>(0, a);
52229 assert_eq_m512i(r, _mm512_setzero_si512());
52230 let r = _mm512_maskz_rol_epi32::<1>(0b00000000_11111111, a);
52231 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52232 assert_eq_m512i(r, e);
52233 }
52234
52235 #[simd_test(enable = "avx512f,avx512vl")]
52236 unsafe fn test_mm256_rol_epi32() {
52237 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52238 let r = _mm256_rol_epi32::<1>(a);
52239 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52240 assert_eq_m256i(r, e);
52241 }
52242
52243 #[simd_test(enable = "avx512f,avx512vl")]
52244 unsafe fn test_mm256_mask_rol_epi32() {
52245 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52246 let r = _mm256_mask_rol_epi32::<1>(a, 0, a);
52247 assert_eq_m256i(r, a);
52248 let r = _mm256_mask_rol_epi32::<1>(a, 0b11111111, a);
52249 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52250 assert_eq_m256i(r, e);
52251 }
52252
52253 #[simd_test(enable = "avx512f,avx512vl")]
52254 unsafe fn test_mm256_maskz_rol_epi32() {
52255 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52256 let r = _mm256_maskz_rol_epi32::<1>(0, a);
52257 assert_eq_m256i(r, _mm256_setzero_si256());
52258 let r = _mm256_maskz_rol_epi32::<1>(0b11111111, a);
52259 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52260 assert_eq_m256i(r, e);
52261 }
52262
52263 #[simd_test(enable = "avx512f,avx512vl")]
52264 unsafe fn test_mm_rol_epi32() {
52265 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52266 let r = _mm_rol_epi32::<1>(a);
52267 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52268 assert_eq_m128i(r, e);
52269 }
52270
52271 #[simd_test(enable = "avx512f,avx512vl")]
52272 unsafe fn test_mm_mask_rol_epi32() {
52273 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52274 let r = _mm_mask_rol_epi32::<1>(a, 0, a);
52275 assert_eq_m128i(r, a);
52276 let r = _mm_mask_rol_epi32::<1>(a, 0b00001111, a);
52277 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52278 assert_eq_m128i(r, e);
52279 }
52280
52281 #[simd_test(enable = "avx512f,avx512vl")]
52282 unsafe fn test_mm_maskz_rol_epi32() {
52283 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52284 let r = _mm_maskz_rol_epi32::<1>(0, a);
52285 assert_eq_m128i(r, _mm_setzero_si128());
52286 let r = _mm_maskz_rol_epi32::<1>(0b00001111, a);
52287 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52288 assert_eq_m128i(r, e);
52289 }
52290
52291 #[simd_test(enable = "avx512f")]
52292 unsafe fn test_mm512_ror_epi32() {
52293 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52294 let r = _mm512_ror_epi32::<1>(a);
52295 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52296 assert_eq_m512i(r, e);
52297 }
52298
52299 #[simd_test(enable = "avx512f")]
52300 unsafe fn test_mm512_mask_ror_epi32() {
52301 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52302 let r = _mm512_mask_ror_epi32::<1>(a, 0, a);
52303 assert_eq_m512i(r, a);
52304 let r = _mm512_mask_ror_epi32::<1>(a, 0b11111111_11111111, a);
52305 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52306 assert_eq_m512i(r, e);
52307 }
52308
52309 #[simd_test(enable = "avx512f")]
52310 unsafe fn test_mm512_maskz_ror_epi32() {
52311 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52312 let r = _mm512_maskz_ror_epi32::<1>(0, a);
52313 assert_eq_m512i(r, _mm512_setzero_si512());
52314 let r = _mm512_maskz_ror_epi32::<1>(0b00000000_11111111, a);
52315 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52316 assert_eq_m512i(r, e);
52317 }
52318
52319 #[simd_test(enable = "avx512f,avx512vl")]
52320 unsafe fn test_mm256_ror_epi32() {
52321 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52322 let r = _mm256_ror_epi32::<1>(a);
52323 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52324 assert_eq_m256i(r, e);
52325 }
52326
52327 #[simd_test(enable = "avx512f,avx512vl")]
52328 unsafe fn test_mm256_mask_ror_epi32() {
52329 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52330 let r = _mm256_mask_ror_epi32::<1>(a, 0, a);
52331 assert_eq_m256i(r, a);
52332 let r = _mm256_mask_ror_epi32::<1>(a, 0b11111111, a);
52333 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52334 assert_eq_m256i(r, e);
52335 }
52336
52337 #[simd_test(enable = "avx512f,avx512vl")]
52338 unsafe fn test_mm256_maskz_ror_epi32() {
52339 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52340 let r = _mm256_maskz_ror_epi32::<1>(0, a);
52341 assert_eq_m256i(r, _mm256_setzero_si256());
52342 let r = _mm256_maskz_ror_epi32::<1>(0b11111111, a);
52343 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52344 assert_eq_m256i(r, e);
52345 }
52346
52347 #[simd_test(enable = "avx512f,avx512vl")]
52348 unsafe fn test_mm_ror_epi32() {
52349 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52350 let r = _mm_ror_epi32::<1>(a);
52351 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52352 assert_eq_m128i(r, e);
52353 }
52354
52355 #[simd_test(enable = "avx512f,avx512vl")]
52356 unsafe fn test_mm_mask_ror_epi32() {
52357 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52358 let r = _mm_mask_ror_epi32::<1>(a, 0, a);
52359 assert_eq_m128i(r, a);
52360 let r = _mm_mask_ror_epi32::<1>(a, 0b00001111, a);
52361 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52362 assert_eq_m128i(r, e);
52363 }
52364
52365 #[simd_test(enable = "avx512f,avx512vl")]
52366 unsafe fn test_mm_maskz_ror_epi32() {
52367 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52368 let r = _mm_maskz_ror_epi32::<1>(0, a);
52369 assert_eq_m128i(r, _mm_setzero_si128());
52370 let r = _mm_maskz_ror_epi32::<1>(0b00001111, a);
52371 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52372 assert_eq_m128i(r, e);
52373 }
52374
52375 #[simd_test(enable = "avx512f")]
52376 unsafe fn test_mm512_slli_epi32() {
52377 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52378 let r = _mm512_slli_epi32::<1>(a);
52379 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52380 assert_eq_m512i(r, e);
52381 }
52382
52383 #[simd_test(enable = "avx512f")]
52384 unsafe fn test_mm512_mask_slli_epi32() {
52385 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52386 let r = _mm512_mask_slli_epi32::<1>(a, 0, a);
52387 assert_eq_m512i(r, a);
52388 let r = _mm512_mask_slli_epi32::<1>(a, 0b11111111_11111111, a);
52389 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52390 assert_eq_m512i(r, e);
52391 }
52392
52393 #[simd_test(enable = "avx512f")]
52394 unsafe fn test_mm512_maskz_slli_epi32() {
52395 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52396 let r = _mm512_maskz_slli_epi32::<1>(0, a);
52397 assert_eq_m512i(r, _mm512_setzero_si512());
52398 let r = _mm512_maskz_slli_epi32::<1>(0b00000000_11111111, a);
52399 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
52400 assert_eq_m512i(r, e);
52401 }
52402
52403 #[simd_test(enable = "avx512f,avx512vl")]
52404 unsafe fn test_mm256_mask_slli_epi32() {
52405 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52406 let r = _mm256_mask_slli_epi32::<1>(a, 0, a);
52407 assert_eq_m256i(r, a);
52408 let r = _mm256_mask_slli_epi32::<1>(a, 0b11111111, a);
52409 let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52410 assert_eq_m256i(r, e);
52411 }
52412
52413 #[simd_test(enable = "avx512f,avx512vl")]
52414 unsafe fn test_mm256_maskz_slli_epi32() {
52415 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52416 let r = _mm256_maskz_slli_epi32::<1>(0, a);
52417 assert_eq_m256i(r, _mm256_setzero_si256());
52418 let r = _mm256_maskz_slli_epi32::<1>(0b11111111, a);
52419 let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52420 assert_eq_m256i(r, e);
52421 }
52422
52423 #[simd_test(enable = "avx512f,avx512vl")]
52424 unsafe fn test_mm_mask_slli_epi32() {
52425 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52426 let r = _mm_mask_slli_epi32::<1>(a, 0, a);
52427 assert_eq_m128i(r, a);
52428 let r = _mm_mask_slli_epi32::<1>(a, 0b00001111, a);
52429 let e = _mm_set_epi32(0, 2, 2, 2);
52430 assert_eq_m128i(r, e);
52431 }
52432
52433 #[simd_test(enable = "avx512f,avx512vl")]
52434 unsafe fn test_mm_maskz_slli_epi32() {
52435 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52436 let r = _mm_maskz_slli_epi32::<1>(0, a);
52437 assert_eq_m128i(r, _mm_setzero_si128());
52438 let r = _mm_maskz_slli_epi32::<1>(0b00001111, a);
52439 let e = _mm_set_epi32(0, 2, 2, 2);
52440 assert_eq_m128i(r, e);
52441 }
52442
52443 #[simd_test(enable = "avx512f")]
52444 unsafe fn test_mm512_srli_epi32() {
52445 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52446 let r = _mm512_srli_epi32::<1>(a);
52447 let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52448 assert_eq_m512i(r, e);
52449 }
52450
52451 #[simd_test(enable = "avx512f")]
52452 unsafe fn test_mm512_mask_srli_epi32() {
52453 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52454 let r = _mm512_mask_srli_epi32::<1>(a, 0, a);
52455 assert_eq_m512i(r, a);
52456 let r = _mm512_mask_srli_epi32::<1>(a, 0b11111111_11111111, a);
52457 let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52458 assert_eq_m512i(r, e);
52459 }
52460
52461 #[simd_test(enable = "avx512f")]
52462 unsafe fn test_mm512_maskz_srli_epi32() {
52463 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
52464 let r = _mm512_maskz_srli_epi32::<1>(0, a);
52465 assert_eq_m512i(r, _mm512_setzero_si512());
52466 let r = _mm512_maskz_srli_epi32::<1>(0b00000000_11111111, a);
52467 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
52468 assert_eq_m512i(r, e);
52469 }
52470
52471 #[simd_test(enable = "avx512f,avx512vl")]
52472 unsafe fn test_mm256_mask_srli_epi32() {
52473 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52474 let r = _mm256_mask_srli_epi32::<1>(a, 0, a);
52475 assert_eq_m256i(r, a);
52476 let r = _mm256_mask_srli_epi32::<1>(a, 0b11111111, a);
52477 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52478 assert_eq_m256i(r, e);
52479 }
52480
52481 #[simd_test(enable = "avx512f,avx512vl")]
52482 unsafe fn test_mm256_maskz_srli_epi32() {
52483 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52484 let r = _mm256_maskz_srli_epi32::<1>(0, a);
52485 assert_eq_m256i(r, _mm256_setzero_si256());
52486 let r = _mm256_maskz_srli_epi32::<1>(0b11111111, a);
52487 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52488 assert_eq_m256i(r, e);
52489 }
52490
52491 #[simd_test(enable = "avx512f,avx512vl")]
52492 unsafe fn test_mm_mask_srli_epi32() {
52493 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52494 let r = _mm_mask_srli_epi32::<1>(a, 0, a);
52495 assert_eq_m128i(r, a);
52496 let r = _mm_mask_srli_epi32::<1>(a, 0b00001111, a);
52497 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52498 assert_eq_m128i(r, e);
52499 }
52500
52501 #[simd_test(enable = "avx512f,avx512vl")]
52502 unsafe fn test_mm_maskz_srli_epi32() {
52503 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52504 let r = _mm_maskz_srli_epi32::<1>(0, a);
52505 assert_eq_m128i(r, _mm_setzero_si128());
52506 let r = _mm_maskz_srli_epi32::<1>(0b00001111, a);
52507 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52508 assert_eq_m128i(r, e);
52509 }
52510
52511 #[simd_test(enable = "avx512f")]
52512 unsafe fn test_mm512_rolv_epi32() {
52513 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52514 let b = _mm512_set1_epi32(1);
52515 let r = _mm512_rolv_epi32(a, b);
52516 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52517 assert_eq_m512i(r, e);
52518 }
52519
52520 #[simd_test(enable = "avx512f")]
52521 unsafe fn test_mm512_mask_rolv_epi32() {
52522 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52523 let b = _mm512_set1_epi32(1);
52524 let r = _mm512_mask_rolv_epi32(a, 0, a, b);
52525 assert_eq_m512i(r, a);
52526 let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
52527 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52528 assert_eq_m512i(r, e);
52529 }
52530
52531 #[simd_test(enable = "avx512f")]
52532 unsafe fn test_mm512_maskz_rolv_epi32() {
52533 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52534 let b = _mm512_set1_epi32(1);
52535 let r = _mm512_maskz_rolv_epi32(0, a, b);
52536 assert_eq_m512i(r, _mm512_setzero_si512());
52537 let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
52538 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52539 assert_eq_m512i(r, e);
52540 }
52541
52542 #[simd_test(enable = "avx512f,avx512vl")]
52543 unsafe fn test_mm256_rolv_epi32() {
52544 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52545 let b = _mm256_set1_epi32(1);
52546 let r = _mm256_rolv_epi32(a, b);
52547 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52548 assert_eq_m256i(r, e);
52549 }
52550
52551 #[simd_test(enable = "avx512f,avx512vl")]
52552 unsafe fn test_mm256_mask_rolv_epi32() {
52553 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52554 let b = _mm256_set1_epi32(1);
52555 let r = _mm256_mask_rolv_epi32(a, 0, a, b);
52556 assert_eq_m256i(r, a);
52557 let r = _mm256_mask_rolv_epi32(a, 0b11111111, a, b);
52558 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52559 assert_eq_m256i(r, e);
52560 }
52561
52562 #[simd_test(enable = "avx512f,avx512vl")]
52563 unsafe fn test_mm256_maskz_rolv_epi32() {
52564 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52565 let b = _mm256_set1_epi32(1);
52566 let r = _mm256_maskz_rolv_epi32(0, a, b);
52567 assert_eq_m256i(r, _mm256_setzero_si256());
52568 let r = _mm256_maskz_rolv_epi32(0b11111111, a, b);
52569 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52570 assert_eq_m256i(r, e);
52571 }
52572
52573 #[simd_test(enable = "avx512f,avx512vl")]
52574 unsafe fn test_mm_rolv_epi32() {
52575 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52576 let b = _mm_set1_epi32(1);
52577 let r = _mm_rolv_epi32(a, b);
52578 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52579 assert_eq_m128i(r, e);
52580 }
52581
52582 #[simd_test(enable = "avx512f,avx512vl")]
52583 unsafe fn test_mm_mask_rolv_epi32() {
52584 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52585 let b = _mm_set1_epi32(1);
52586 let r = _mm_mask_rolv_epi32(a, 0, a, b);
52587 assert_eq_m128i(r, a);
52588 let r = _mm_mask_rolv_epi32(a, 0b00001111, a, b);
52589 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52590 assert_eq_m128i(r, e);
52591 }
52592
52593 #[simd_test(enable = "avx512f,avx512vl")]
52594 unsafe fn test_mm_maskz_rolv_epi32() {
52595 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52596 let b = _mm_set1_epi32(1);
52597 let r = _mm_maskz_rolv_epi32(0, a, b);
52598 assert_eq_m128i(r, _mm_setzero_si128());
52599 let r = _mm_maskz_rolv_epi32(0b00001111, a, b);
52600 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52601 assert_eq_m128i(r, e);
52602 }
52603
52604 #[simd_test(enable = "avx512f")]
52605 unsafe fn test_mm512_rorv_epi32() {
52606 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52607 let b = _mm512_set1_epi32(1);
52608 let r = _mm512_rorv_epi32(a, b);
52609 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52610 assert_eq_m512i(r, e);
52611 }
52612
52613 #[simd_test(enable = "avx512f")]
52614 unsafe fn test_mm512_mask_rorv_epi32() {
52615 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52616 let b = _mm512_set1_epi32(1);
52617 let r = _mm512_mask_rorv_epi32(a, 0, a, b);
52618 assert_eq_m512i(r, a);
52619 let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
52620 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52621 assert_eq_m512i(r, e);
52622 }
52623
52624 #[simd_test(enable = "avx512f")]
52625 unsafe fn test_mm512_maskz_rorv_epi32() {
52626 let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52627 let b = _mm512_set1_epi32(1);
52628 let r = _mm512_maskz_rorv_epi32(0, a, b);
52629 assert_eq_m512i(r, _mm512_setzero_si512());
52630 let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
52631 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52632 assert_eq_m512i(r, e);
52633 }
52634
52635 #[simd_test(enable = "avx512f,avx512vl")]
52636 unsafe fn test_mm256_rorv_epi32() {
52637 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52638 let b = _mm256_set1_epi32(1);
52639 let r = _mm256_rorv_epi32(a, b);
52640 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52641 assert_eq_m256i(r, e);
52642 }
52643
52644 #[simd_test(enable = "avx512f,avx512vl")]
52645 unsafe fn test_mm256_mask_rorv_epi32() {
52646 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52647 let b = _mm256_set1_epi32(1);
52648 let r = _mm256_mask_rorv_epi32(a, 0, a, b);
52649 assert_eq_m256i(r, a);
52650 let r = _mm256_mask_rorv_epi32(a, 0b11111111, a, b);
52651 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52652 assert_eq_m256i(r, e);
52653 }
52654
52655 #[simd_test(enable = "avx512f,avx512vl")]
52656 unsafe fn test_mm256_maskz_rorv_epi32() {
52657 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52658 let b = _mm256_set1_epi32(1);
52659 let r = _mm256_maskz_rorv_epi32(0, a, b);
52660 assert_eq_m256i(r, _mm256_setzero_si256());
52661 let r = _mm256_maskz_rorv_epi32(0b11111111, a, b);
52662 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52663 assert_eq_m256i(r, e);
52664 }
52665
52666 #[simd_test(enable = "avx512f,avx512vl")]
52667 unsafe fn test_mm_rorv_epi32() {
52668 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52669 let b = _mm_set1_epi32(1);
52670 let r = _mm_rorv_epi32(a, b);
52671 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52672 assert_eq_m128i(r, e);
52673 }
52674
52675 #[simd_test(enable = "avx512f,avx512vl")]
52676 unsafe fn test_mm_mask_rorv_epi32() {
52677 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52678 let b = _mm_set1_epi32(1);
52679 let r = _mm_mask_rorv_epi32(a, 0, a, b);
52680 assert_eq_m128i(r, a);
52681 let r = _mm_mask_rorv_epi32(a, 0b00001111, a, b);
52682 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52683 assert_eq_m128i(r, e);
52684 }
52685
52686 #[simd_test(enable = "avx512f,avx512vl")]
52687 unsafe fn test_mm_maskz_rorv_epi32() {
52688 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52689 let b = _mm_set1_epi32(1);
52690 let r = _mm_maskz_rorv_epi32(0, a, b);
52691 assert_eq_m128i(r, _mm_setzero_si128());
52692 let r = _mm_maskz_rorv_epi32(0b00001111, a, b);
52693 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52694 assert_eq_m128i(r, e);
52695 }
52696
52697 #[simd_test(enable = "avx512f")]
52698 unsafe fn test_mm512_sllv_epi32() {
52699 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52700 let count = _mm512_set1_epi32(1);
52701 let r = _mm512_sllv_epi32(a, count);
52702 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52703 assert_eq_m512i(r, e);
52704 }
52705
52706 #[simd_test(enable = "avx512f")]
52707 unsafe fn test_mm512_mask_sllv_epi32() {
52708 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52709 let count = _mm512_set1_epi32(1);
52710 let r = _mm512_mask_sllv_epi32(a, 0, a, count);
52711 assert_eq_m512i(r, a);
52712 let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
52713 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52714 assert_eq_m512i(r, e);
52715 }
52716
52717 #[simd_test(enable = "avx512f")]
52718 unsafe fn test_mm512_maskz_sllv_epi32() {
52719 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52720 let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52721 let r = _mm512_maskz_sllv_epi32(0, a, count);
52722 assert_eq_m512i(r, _mm512_setzero_si512());
52723 let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
52724 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
52725 assert_eq_m512i(r, e);
52726 }
52727
52728 #[simd_test(enable = "avx512f,avx512vl")]
52729 unsafe fn test_mm256_mask_sllv_epi32() {
52730 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52731 let count = _mm256_set1_epi32(1);
52732 let r = _mm256_mask_sllv_epi32(a, 0, a, count);
52733 assert_eq_m256i(r, a);
52734 let r = _mm256_mask_sllv_epi32(a, 0b11111111, a, count);
52735 let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52736 assert_eq_m256i(r, e);
52737 }
52738
52739 #[simd_test(enable = "avx512f,avx512vl")]
52740 unsafe fn test_mm256_maskz_sllv_epi32() {
52741 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52742 let count = _mm256_set1_epi32(1);
52743 let r = _mm256_maskz_sllv_epi32(0, a, count);
52744 assert_eq_m256i(r, _mm256_setzero_si256());
52745 let r = _mm256_maskz_sllv_epi32(0b11111111, a, count);
52746 let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52747 assert_eq_m256i(r, e);
52748 }
52749
52750 #[simd_test(enable = "avx512f,avx512vl")]
52751 unsafe fn test_mm_mask_sllv_epi32() {
52752 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52753 let count = _mm_set1_epi32(1);
52754 let r = _mm_mask_sllv_epi32(a, 0, a, count);
52755 assert_eq_m128i(r, a);
52756 let r = _mm_mask_sllv_epi32(a, 0b00001111, a, count);
52757 let e = _mm_set_epi32(0, 2, 2, 2);
52758 assert_eq_m128i(r, e);
52759 }
52760
52761 #[simd_test(enable = "avx512f,avx512vl")]
52762 unsafe fn test_mm_maskz_sllv_epi32() {
52763 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52764 let count = _mm_set1_epi32(1);
52765 let r = _mm_maskz_sllv_epi32(0, a, count);
52766 assert_eq_m128i(r, _mm_setzero_si128());
52767 let r = _mm_maskz_sllv_epi32(0b00001111, a, count);
52768 let e = _mm_set_epi32(0, 2, 2, 2);
52769 assert_eq_m128i(r, e);
52770 }
52771
52772 #[simd_test(enable = "avx512f")]
52773 unsafe fn test_mm512_srlv_epi32() {
52774 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52775 let count = _mm512_set1_epi32(1);
52776 let r = _mm512_srlv_epi32(a, count);
52777 let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52778 assert_eq_m512i(r, e);
52779 }
52780
52781 #[simd_test(enable = "avx512f")]
52782 unsafe fn test_mm512_mask_srlv_epi32() {
52783 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52784 let count = _mm512_set1_epi32(1);
52785 let r = _mm512_mask_srlv_epi32(a, 0, a, count);
52786 assert_eq_m512i(r, a);
52787 let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
52788 let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52789 assert_eq_m512i(r, e);
52790 }
52791
52792 #[simd_test(enable = "avx512f")]
52793 unsafe fn test_mm512_maskz_srlv_epi32() {
52794 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
52795 let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52796 let r = _mm512_maskz_srlv_epi32(0, a, count);
52797 assert_eq_m512i(r, _mm512_setzero_si512());
52798 let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
52799 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
52800 assert_eq_m512i(r, e);
52801 }
52802
52803 #[simd_test(enable = "avx512f,avx512vl")]
52804 unsafe fn test_mm256_mask_srlv_epi32() {
52805 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52806 let count = _mm256_set1_epi32(1);
52807 let r = _mm256_mask_srlv_epi32(a, 0, a, count);
52808 assert_eq_m256i(r, a);
52809 let r = _mm256_mask_srlv_epi32(a, 0b11111111, a, count);
52810 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52811 assert_eq_m256i(r, e);
52812 }
52813
52814 #[simd_test(enable = "avx512f,avx512vl")]
52815 unsafe fn test_mm256_maskz_srlv_epi32() {
52816 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52817 let count = _mm256_set1_epi32(1);
52818 let r = _mm256_maskz_srlv_epi32(0, a, count);
52819 assert_eq_m256i(r, _mm256_setzero_si256());
52820 let r = _mm256_maskz_srlv_epi32(0b11111111, a, count);
52821 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52822 assert_eq_m256i(r, e);
52823 }
52824
52825 #[simd_test(enable = "avx512f,avx512vl")]
52826 unsafe fn test_mm_mask_srlv_epi32() {
52827 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52828 let count = _mm_set1_epi32(1);
52829 let r = _mm_mask_srlv_epi32(a, 0, a, count);
52830 assert_eq_m128i(r, a);
52831 let r = _mm_mask_srlv_epi32(a, 0b00001111, a, count);
52832 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52833 assert_eq_m128i(r, e);
52834 }
52835
52836 #[simd_test(enable = "avx512f,avx512vl")]
52837 unsafe fn test_mm_maskz_srlv_epi32() {
52838 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52839 let count = _mm_set1_epi32(1);
52840 let r = _mm_maskz_srlv_epi32(0, a, count);
52841 assert_eq_m128i(r, _mm_setzero_si128());
52842 let r = _mm_maskz_srlv_epi32(0b00001111, a, count);
52843 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52844 assert_eq_m128i(r, e);
52845 }
52846
52847 #[simd_test(enable = "avx512f")]
52848 unsafe fn test_mm512_sll_epi32() {
52849 #[rustfmt::skip]
52850 let a = _mm512_set_epi32(
52851 1 << 31, 1 << 0, 1 << 1, 1 << 2,
52852 0, 0, 0, 0,
52853 0, 0, 0, 0,
52854 0, 0, 0, 0,
52855 );
52856 let count = _mm_set_epi32(0, 0, 0, 2);
52857 let r = _mm512_sll_epi32(a, count);
52858 #[rustfmt::skip]
52859 let e = _mm512_set_epi32(
52860 0, 1 << 2, 1 << 3, 1 << 4,
52861 0, 0, 0, 0,
52862 0, 0, 0, 0,
52863 0, 0, 0, 0,
52864 );
52865 assert_eq_m512i(r, e);
52866 }
52867
52868 #[simd_test(enable = "avx512f")]
52869 unsafe fn test_mm512_mask_sll_epi32() {
52870 #[rustfmt::skip]
52871 let a = _mm512_set_epi32(
52872 1 << 31, 1 << 0, 1 << 1, 1 << 2,
52873 0, 0, 0, 0,
52874 0, 0, 0, 0,
52875 0, 0, 0, 0,
52876 );
52877 let count = _mm_set_epi32(0, 0, 0, 2);
52878 let r = _mm512_mask_sll_epi32(a, 0, a, count);
52879 assert_eq_m512i(r, a);
52880 let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
52881 #[rustfmt::skip]
52882 let e = _mm512_set_epi32(
52883 0, 1 << 2, 1 << 3, 1 << 4,
52884 0, 0, 0, 0,
52885 0, 0, 0, 0,
52886 0, 0, 0, 0,
52887 );
52888 assert_eq_m512i(r, e);
52889 }
52890
52891 #[simd_test(enable = "avx512f")]
52892 unsafe fn test_mm512_maskz_sll_epi32() {
52893 #[rustfmt::skip]
52894 let a = _mm512_set_epi32(
52895 1 << 31, 1 << 0, 1 << 1, 1 << 2,
52896 0, 0, 0, 0,
52897 0, 0, 0, 0,
52898 0, 0, 0, 1 << 31,
52899 );
52900 let count = _mm_set_epi32(2, 0, 0, 2);
52901 let r = _mm512_maskz_sll_epi32(0, a, count);
52902 assert_eq_m512i(r, _mm512_setzero_si512());
52903 let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
52904 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52905 assert_eq_m512i(r, e);
52906 }
52907
52908 #[simd_test(enable = "avx512f,avx512vl")]
52909 unsafe fn test_mm256_mask_sll_epi32() {
52910 let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
52911 let count = _mm_set_epi32(0, 0, 0, 1);
52912 let r = _mm256_mask_sll_epi32(a, 0, a, count);
52913 assert_eq_m256i(r, a);
52914 let r = _mm256_mask_sll_epi32(a, 0b11111111, a, count);
52915 let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
52916 assert_eq_m256i(r, e);
52917 }
52918
52919 #[simd_test(enable = "avx512f,avx512vl")]
52920 unsafe fn test_mm256_maskz_sll_epi32() {
52921 let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
52922 let count = _mm_set_epi32(0, 0, 0, 1);
52923 let r = _mm256_maskz_sll_epi32(0, a, count);
52924 assert_eq_m256i(r, _mm256_setzero_si256());
52925 let r = _mm256_maskz_sll_epi32(0b11111111, a, count);
52926 let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
52927 assert_eq_m256i(r, e);
52928 }
52929
52930 #[simd_test(enable = "avx512f,avx512vl")]
52931 unsafe fn test_mm_mask_sll_epi32() {
52932 let a = _mm_set_epi32(1 << 13, 0, 0, 0);
52933 let count = _mm_set_epi32(0, 0, 0, 1);
52934 let r = _mm_mask_sll_epi32(a, 0, a, count);
52935 assert_eq_m128i(r, a);
52936 let r = _mm_mask_sll_epi32(a, 0b00001111, a, count);
52937 let e = _mm_set_epi32(1 << 14, 0, 0, 0);
52938 assert_eq_m128i(r, e);
52939 }
52940
52941 #[simd_test(enable = "avx512f,avx512vl")]
52942 unsafe fn test_mm_maskz_sll_epi32() {
52943 let a = _mm_set_epi32(1 << 13, 0, 0, 0);
52944 let count = _mm_set_epi32(0, 0, 0, 1);
52945 let r = _mm_maskz_sll_epi32(0, a, count);
52946 assert_eq_m128i(r, _mm_setzero_si128());
52947 let r = _mm_maskz_sll_epi32(0b00001111, a, count);
52948 let e = _mm_set_epi32(1 << 14, 0, 0, 0);
52949 assert_eq_m128i(r, e);
52950 }
52951
52952 #[simd_test(enable = "avx512f")]
52953 unsafe fn test_mm512_srl_epi32() {
52954 #[rustfmt::skip]
52955 let a = _mm512_set_epi32(
52956 1 << 31, 1 << 0, 1 << 1, 1 << 2,
52957 0, 0, 0, 0,
52958 0, 0, 0, 0,
52959 0, 0, 0, 0,
52960 );
52961 let count = _mm_set_epi32(0, 0, 0, 2);
52962 let r = _mm512_srl_epi32(a, count);
52963 let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52964 assert_eq_m512i(r, e);
52965 }
52966
52967 #[simd_test(enable = "avx512f")]
52968 unsafe fn test_mm512_mask_srl_epi32() {
52969 #[rustfmt::skip]
52970 let a = _mm512_set_epi32(
52971 1 << 31, 1 << 0, 1 << 1, 1 << 2,
52972 0, 0, 0, 0,
52973 0, 0, 0, 0,
52974 0, 0, 0, 0,
52975 );
52976 let count = _mm_set_epi32(0, 0, 0, 2);
52977 let r = _mm512_mask_srl_epi32(a, 0, a, count);
52978 assert_eq_m512i(r, a);
52979 let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
52980 let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52981 assert_eq_m512i(r, e);
52982 }
52983
52984 #[simd_test(enable = "avx512f")]
52985 unsafe fn test_mm512_maskz_srl_epi32() {
52986 #[rustfmt::skip]
52987 let a = _mm512_set_epi32(
52988 1 << 31, 1 << 0, 1 << 1, 1 << 2,
52989 0, 0, 0, 0,
52990 0, 0, 0, 0,
52991 0, 0, 0, 1 << 31,
52992 );
52993 let count = _mm_set_epi32(2, 0, 0, 2);
52994 let r = _mm512_maskz_srl_epi32(0, a, count);
52995 assert_eq_m512i(r, _mm512_setzero_si512());
52996 let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
52997 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
52998 assert_eq_m512i(r, e);
52999 }
53000
53001 #[simd_test(enable = "avx512f,avx512vl")]
53002 unsafe fn test_mm256_mask_srl_epi32() {
53003 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53004 let count = _mm_set_epi32(0, 0, 0, 1);
53005 let r = _mm256_mask_srl_epi32(a, 0, a, count);
53006 assert_eq_m256i(r, a);
53007 let r = _mm256_mask_srl_epi32(a, 0b11111111, a, count);
53008 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53009 assert_eq_m256i(r, e);
53010 }
53011
53012 #[simd_test(enable = "avx512f,avx512vl")]
53013 unsafe fn test_mm256_maskz_srl_epi32() {
53014 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53015 let count = _mm_set_epi32(0, 0, 0, 1);
53016 let r = _mm256_maskz_srl_epi32(0, a, count);
53017 assert_eq_m256i(r, _mm256_setzero_si256());
53018 let r = _mm256_maskz_srl_epi32(0b11111111, a, count);
53019 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53020 assert_eq_m256i(r, e);
53021 }
53022
53023 #[simd_test(enable = "avx512f,avx512vl")]
53024 unsafe fn test_mm_mask_srl_epi32() {
53025 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53026 let count = _mm_set_epi32(0, 0, 0, 1);
53027 let r = _mm_mask_srl_epi32(a, 0, a, count);
53028 assert_eq_m128i(r, a);
53029 let r = _mm_mask_srl_epi32(a, 0b00001111, a, count);
53030 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53031 assert_eq_m128i(r, e);
53032 }
53033
53034 #[simd_test(enable = "avx512f,avx512vl")]
53035 unsafe fn test_mm_maskz_srl_epi32() {
53036 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53037 let count = _mm_set_epi32(0, 0, 0, 1);
53038 let r = _mm_maskz_srl_epi32(0, a, count);
53039 assert_eq_m128i(r, _mm_setzero_si128());
53040 let r = _mm_maskz_srl_epi32(0b00001111, a, count);
53041 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53042 assert_eq_m128i(r, e);
53043 }
53044
53045 #[simd_test(enable = "avx512f")]
53046 unsafe fn test_mm512_sra_epi32() {
53047 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53048 let count = _mm_set_epi32(1, 0, 0, 2);
53049 let r = _mm512_sra_epi32(a, count);
53050 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
53051 assert_eq_m512i(r, e);
53052 }
53053
53054 #[simd_test(enable = "avx512f")]
53055 unsafe fn test_mm512_mask_sra_epi32() {
53056 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
53057 let count = _mm_set_epi32(0, 0, 0, 2);
53058 let r = _mm512_mask_sra_epi32(a, 0, a, count);
53059 assert_eq_m512i(r, a);
53060 let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
53061 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
53062 assert_eq_m512i(r, e);
53063 }
53064
53065 #[simd_test(enable = "avx512f")]
53066 unsafe fn test_mm512_maskz_sra_epi32() {
53067 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
53068 let count = _mm_set_epi32(2, 0, 0, 2);
53069 let r = _mm512_maskz_sra_epi32(0, a, count);
53070 assert_eq_m512i(r, _mm512_setzero_si512());
53071 let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
53072 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
53073 assert_eq_m512i(r, e);
53074 }
53075
53076 #[simd_test(enable = "avx512f,avx512vl")]
53077 unsafe fn test_mm256_mask_sra_epi32() {
53078 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53079 let count = _mm_set_epi32(0, 0, 0, 1);
53080 let r = _mm256_mask_sra_epi32(a, 0, a, count);
53081 assert_eq_m256i(r, a);
53082 let r = _mm256_mask_sra_epi32(a, 0b11111111, a, count);
53083 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53084 assert_eq_m256i(r, e);
53085 }
53086
53087 #[simd_test(enable = "avx512f,avx512vl")]
53088 unsafe fn test_mm256_maskz_sra_epi32() {
53089 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53090 let count = _mm_set_epi32(0, 0, 0, 1);
53091 let r = _mm256_maskz_sra_epi32(0, a, count);
53092 assert_eq_m256i(r, _mm256_setzero_si256());
53093 let r = _mm256_maskz_sra_epi32(0b11111111, a, count);
53094 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53095 assert_eq_m256i(r, e);
53096 }
53097
53098 #[simd_test(enable = "avx512f,avx512vl")]
53099 unsafe fn test_mm_mask_sra_epi32() {
53100 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53101 let count = _mm_set_epi32(0, 0, 0, 1);
53102 let r = _mm_mask_sra_epi32(a, 0, a, count);
53103 assert_eq_m128i(r, a);
53104 let r = _mm_mask_sra_epi32(a, 0b00001111, a, count);
53105 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53106 assert_eq_m128i(r, e);
53107 }
53108
53109 #[simd_test(enable = "avx512f,avx512vl")]
53110 unsafe fn test_mm_maskz_sra_epi32() {
53111 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53112 let count = _mm_set_epi32(0, 0, 0, 1);
53113 let r = _mm_maskz_sra_epi32(0, a, count);
53114 assert_eq_m128i(r, _mm_setzero_si128());
53115 let r = _mm_maskz_sra_epi32(0b00001111, a, count);
53116 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53117 assert_eq_m128i(r, e);
53118 }
53119
53120 #[simd_test(enable = "avx512f")]
53121 unsafe fn test_mm512_srav_epi32() {
53122 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53123 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
53124 let r = _mm512_srav_epi32(a, count);
53125 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53126 assert_eq_m512i(r, e);
53127 }
53128
53129 #[simd_test(enable = "avx512f")]
53130 unsafe fn test_mm512_mask_srav_epi32() {
53131 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
53132 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53133 let r = _mm512_mask_srav_epi32(a, 0, a, count);
53134 assert_eq_m512i(r, a);
53135 let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
53136 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
53137 assert_eq_m512i(r, e);
53138 }
53139
53140 #[simd_test(enable = "avx512f")]
53141 unsafe fn test_mm512_maskz_srav_epi32() {
53142 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
53143 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
53144 let r = _mm512_maskz_srav_epi32(0, a, count);
53145 assert_eq_m512i(r, _mm512_setzero_si512());
53146 let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
53147 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
53148 assert_eq_m512i(r, e);
53149 }
53150
53151 #[simd_test(enable = "avx512f,avx512vl")]
53152 unsafe fn test_mm256_mask_srav_epi32() {
53153 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53154 let count = _mm256_set1_epi32(1);
53155 let r = _mm256_mask_srav_epi32(a, 0, a, count);
53156 assert_eq_m256i(r, a);
53157 let r = _mm256_mask_srav_epi32(a, 0b11111111, a, count);
53158 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53159 assert_eq_m256i(r, e);
53160 }
53161
53162 #[simd_test(enable = "avx512f,avx512vl")]
53163 unsafe fn test_mm256_maskz_srav_epi32() {
53164 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53165 let count = _mm256_set1_epi32(1);
53166 let r = _mm256_maskz_srav_epi32(0, a, count);
53167 assert_eq_m256i(r, _mm256_setzero_si256());
53168 let r = _mm256_maskz_srav_epi32(0b11111111, a, count);
53169 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53170 assert_eq_m256i(r, e);
53171 }
53172
53173 #[simd_test(enable = "avx512f,avx512vl")]
53174 unsafe fn test_mm_mask_srav_epi32() {
53175 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53176 let count = _mm_set1_epi32(1);
53177 let r = _mm_mask_srav_epi32(a, 0, a, count);
53178 assert_eq_m128i(r, a);
53179 let r = _mm_mask_srav_epi32(a, 0b00001111, a, count);
53180 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53181 assert_eq_m128i(r, e);
53182 }
53183
53184 #[simd_test(enable = "avx512f,avx512vl")]
53185 unsafe fn test_mm_maskz_srav_epi32() {
53186 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53187 let count = _mm_set1_epi32(1);
53188 let r = _mm_maskz_srav_epi32(0, a, count);
53189 assert_eq_m128i(r, _mm_setzero_si128());
53190 let r = _mm_maskz_srav_epi32(0b00001111, a, count);
53191 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53192 assert_eq_m128i(r, e);
53193 }
53194
53195 #[simd_test(enable = "avx512f")]
53196 unsafe fn test_mm512_srai_epi32() {
53197 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
53198 let r = _mm512_srai_epi32::<2>(a);
53199 let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
53200 assert_eq_m512i(r, e);
53201 }
53202
53203 #[simd_test(enable = "avx512f")]
53204 unsafe fn test_mm512_mask_srai_epi32() {
53205 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
53206 let r = _mm512_mask_srai_epi32::<2>(a, 0, a);
53207 assert_eq_m512i(r, a);
53208 let r = _mm512_mask_srai_epi32::<2>(a, 0b11111111_11111111, a);
53209 let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
53210 assert_eq_m512i(r, e);
53211 }
53212
53213 #[simd_test(enable = "avx512f")]
53214 unsafe fn test_mm512_maskz_srai_epi32() {
53215 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
53216 let r = _mm512_maskz_srai_epi32::<2>(0, a);
53217 assert_eq_m512i(r, _mm512_setzero_si512());
53218 let r = _mm512_maskz_srai_epi32::<2>(0b00000000_11111111, a);
53219 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
53220 assert_eq_m512i(r, e);
53221 }
53222
53223 #[simd_test(enable = "avx512f,avx512vl")]
53224 unsafe fn test_mm256_mask_srai_epi32() {
53225 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53226 let r = _mm256_mask_srai_epi32::<1>(a, 0, a);
53227 assert_eq_m256i(r, a);
53228 let r = _mm256_mask_srai_epi32::<1>(a, 0b11111111, a);
53229 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53230 assert_eq_m256i(r, e);
53231 }
53232
53233 #[simd_test(enable = "avx512f,avx512vl")]
53234 unsafe fn test_mm256_maskz_srai_epi32() {
53235 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53236 let r = _mm256_maskz_srai_epi32::<1>(0, a);
53237 assert_eq_m256i(r, _mm256_setzero_si256());
53238 let r = _mm256_maskz_srai_epi32::<1>(0b11111111, a);
53239 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53240 assert_eq_m256i(r, e);
53241 }
53242
53243 #[simd_test(enable = "avx512f,avx512vl")]
53244 unsafe fn test_mm_mask_srai_epi32() {
53245 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53246 let r = _mm_mask_srai_epi32::<1>(a, 0, a);
53247 assert_eq_m128i(r, a);
53248 let r = _mm_mask_srai_epi32::<1>(a, 0b00001111, a);
53249 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53250 assert_eq_m128i(r, e);
53251 }
53252
53253 #[simd_test(enable = "avx512f,avx512vl")]
53254 unsafe fn test_mm_maskz_srai_epi32() {
53255 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53256 let r = _mm_maskz_srai_epi32::<1>(0, a);
53257 assert_eq_m128i(r, _mm_setzero_si128());
53258 let r = _mm_maskz_srai_epi32::<1>(0b00001111, a);
53259 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53260 assert_eq_m128i(r, e);
53261 }
53262
53263 #[simd_test(enable = "avx512f")]
53264 unsafe fn test_mm512_permute_ps() {
53265 let a = _mm512_setr_ps(
53266 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53267 );
53268 let r = _mm512_permute_ps::<0b11_11_11_11>(a);
53269 let e = _mm512_setr_ps(
53270 3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53271 );
53272 assert_eq_m512(r, e);
53273 }
53274
53275 #[simd_test(enable = "avx512f")]
53276 unsafe fn test_mm512_mask_permute_ps() {
53277 let a = _mm512_setr_ps(
53278 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53279 );
53280 let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53281 assert_eq_m512(r, a);
53282 let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111_11111111, a);
53283 let e = _mm512_setr_ps(
53284 3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53285 );
53286 assert_eq_m512(r, e);
53287 }
53288
53289 #[simd_test(enable = "avx512f")]
53290 unsafe fn test_mm512_maskz_permute_ps() {
53291 let a = _mm512_setr_ps(
53292 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53293 );
53294 let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0, a);
53295 assert_eq_m512(r, _mm512_setzero_ps());
53296 let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0b11111111_11111111, a);
53297 let e = _mm512_setr_ps(
53298 3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53299 );
53300 assert_eq_m512(r, e);
53301 }
53302
53303 #[simd_test(enable = "avx512f,avx512vl")]
53304 unsafe fn test_mm256_mask_permute_ps() {
53305 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53306 let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53307 assert_eq_m256(r, a);
53308 let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111, a);
53309 let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
53310 assert_eq_m256(r, e);
53311 }
53312
53313 #[simd_test(enable = "avx512f,avx512vl")]
53314 unsafe fn test_mm256_maskz_permute_ps() {
53315 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53316 let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0, a);
53317 assert_eq_m256(r, _mm256_setzero_ps());
53318 let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0b11111111, a);
53319 let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
53320 assert_eq_m256(r, e);
53321 }
53322
53323 #[simd_test(enable = "avx512f,avx512vl")]
53324 unsafe fn test_mm_mask_permute_ps() {
53325 let a = _mm_set_ps(0., 1., 2., 3.);
53326 let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53327 assert_eq_m128(r, a);
53328 let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0b00001111, a);
53329 let e = _mm_set_ps(0., 0., 0., 0.);
53330 assert_eq_m128(r, e);
53331 }
53332
53333 #[simd_test(enable = "avx512f,avx512vl")]
53334 unsafe fn test_mm_maskz_permute_ps() {
53335 let a = _mm_set_ps(0., 1., 2., 3.);
53336 let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0, a);
53337 assert_eq_m128(r, _mm_setzero_ps());
53338 let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0b00001111, a);
53339 let e = _mm_set_ps(0., 0., 0., 0.);
53340 assert_eq_m128(r, e);
53341 }
53342
53343 #[simd_test(enable = "avx512f")]
53344 unsafe fn test_mm512_permutevar_epi32() {
53345 let idx = _mm512_set1_epi32(1);
53346 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53347 let r = _mm512_permutevar_epi32(idx, a);
53348 let e = _mm512_set1_epi32(14);
53349 assert_eq_m512i(r, e);
53350 }
53351
53352 #[simd_test(enable = "avx512f")]
53353 unsafe fn test_mm512_mask_permutevar_epi32() {
53354 let idx = _mm512_set1_epi32(1);
53355 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53356 let r = _mm512_mask_permutevar_epi32(a, 0, idx, a);
53357 assert_eq_m512i(r, a);
53358 let r = _mm512_mask_permutevar_epi32(a, 0b11111111_11111111, idx, a);
53359 let e = _mm512_set1_epi32(14);
53360 assert_eq_m512i(r, e);
53361 }
53362
53363 #[simd_test(enable = "avx512f")]
53364 unsafe fn test_mm512_permutevar_ps() {
53365 let a = _mm512_set_ps(
53366 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53367 );
53368 let b = _mm512_set1_epi32(0b01);
53369 let r = _mm512_permutevar_ps(a, b);
53370 let e = _mm512_set_ps(
53371 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
53372 );
53373 assert_eq_m512(r, e);
53374 }
53375
53376 #[simd_test(enable = "avx512f")]
53377 unsafe fn test_mm512_mask_permutevar_ps() {
53378 let a = _mm512_set_ps(
53379 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53380 );
53381 let b = _mm512_set1_epi32(0b01);
53382 let r = _mm512_mask_permutevar_ps(a, 0, a, b);
53383 assert_eq_m512(r, a);
53384 let r = _mm512_mask_permutevar_ps(a, 0b11111111_11111111, a, b);
53385 let e = _mm512_set_ps(
53386 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
53387 );
53388 assert_eq_m512(r, e);
53389 }
53390
53391 #[simd_test(enable = "avx512f")]
53392 unsafe fn test_mm512_maskz_permutevar_ps() {
53393 let a = _mm512_set_ps(
53394 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53395 );
53396 let b = _mm512_set1_epi32(0b01);
53397 let r = _mm512_maskz_permutevar_ps(0, a, b);
53398 assert_eq_m512(r, _mm512_setzero_ps());
53399 let r = _mm512_maskz_permutevar_ps(0b00000000_11111111, a, b);
53400 let e = _mm512_set_ps(
53401 0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
53402 );
53403 assert_eq_m512(r, e);
53404 }
53405
53406 #[simd_test(enable = "avx512f,avx512vl")]
53407 unsafe fn test_mm256_mask_permutevar_ps() {
53408 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53409 let b = _mm256_set1_epi32(0b01);
53410 let r = _mm256_mask_permutevar_ps(a, 0, a, b);
53411 assert_eq_m256(r, a);
53412 let r = _mm256_mask_permutevar_ps(a, 0b11111111, a, b);
53413 let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
53414 assert_eq_m256(r, e);
53415 }
53416
53417 #[simd_test(enable = "avx512f,avx512vl")]
53418 unsafe fn test_mm256_maskz_permutevar_ps() {
53419 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53420 let b = _mm256_set1_epi32(0b01);
53421 let r = _mm256_maskz_permutevar_ps(0, a, b);
53422 assert_eq_m256(r, _mm256_setzero_ps());
53423 let r = _mm256_maskz_permutevar_ps(0b11111111, a, b);
53424 let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
53425 assert_eq_m256(r, e);
53426 }
53427
53428 #[simd_test(enable = "avx512f,avx512vl")]
53429 unsafe fn test_mm_mask_permutevar_ps() {
53430 let a = _mm_set_ps(0., 1., 2., 3.);
53431 let b = _mm_set1_epi32(0b01);
53432 let r = _mm_mask_permutevar_ps(a, 0, a, b);
53433 assert_eq_m128(r, a);
53434 let r = _mm_mask_permutevar_ps(a, 0b00001111, a, b);
53435 let e = _mm_set_ps(2., 2., 2., 2.);
53436 assert_eq_m128(r, e);
53437 }
53438
53439 #[simd_test(enable = "avx512f,avx512vl")]
53440 unsafe fn test_mm_maskz_permutevar_ps() {
53441 let a = _mm_set_ps(0., 1., 2., 3.);
53442 let b = _mm_set1_epi32(0b01);
53443 let r = _mm_maskz_permutevar_ps(0, a, b);
53444 assert_eq_m128(r, _mm_setzero_ps());
53445 let r = _mm_maskz_permutevar_ps(0b00001111, a, b);
53446 let e = _mm_set_ps(2., 2., 2., 2.);
53447 assert_eq_m128(r, e);
53448 }
53449
53450 #[simd_test(enable = "avx512f")]
53451 unsafe fn test_mm512_permutexvar_epi32() {
53452 let idx = _mm512_set1_epi32(1);
53453 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53454 let r = _mm512_permutexvar_epi32(idx, a);
53455 let e = _mm512_set1_epi32(14);
53456 assert_eq_m512i(r, e);
53457 }
53458
53459 #[simd_test(enable = "avx512f")]
53460 unsafe fn test_mm512_mask_permutexvar_epi32() {
53461 let idx = _mm512_set1_epi32(1);
53462 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53463 let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a);
53464 assert_eq_m512i(r, a);
53465 let r = _mm512_mask_permutexvar_epi32(a, 0b11111111_11111111, idx, a);
53466 let e = _mm512_set1_epi32(14);
53467 assert_eq_m512i(r, e);
53468 }
53469
53470 #[simd_test(enable = "avx512f")]
53471 unsafe fn test_mm512_maskz_permutexvar_epi32() {
53472 let idx = _mm512_set1_epi32(1);
53473 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53474 let r = _mm512_maskz_permutexvar_epi32(0, idx, a);
53475 assert_eq_m512i(r, _mm512_setzero_si512());
53476 let r = _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx, a);
53477 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14);
53478 assert_eq_m512i(r, e);
53479 }
53480
53481 #[simd_test(enable = "avx512f,avx512vl")]
53482 unsafe fn test_mm256_permutexvar_epi32() {
53483 let idx = _mm256_set1_epi32(1);
53484 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53485 let r = _mm256_permutexvar_epi32(idx, a);
53486 let e = _mm256_set1_epi32(6);
53487 assert_eq_m256i(r, e);
53488 }
53489
53490 #[simd_test(enable = "avx512f,avx512vl")]
53491 unsafe fn test_mm256_mask_permutexvar_epi32() {
53492 let idx = _mm256_set1_epi32(1);
53493 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53494 let r = _mm256_mask_permutexvar_epi32(a, 0, idx, a);
53495 assert_eq_m256i(r, a);
53496 let r = _mm256_mask_permutexvar_epi32(a, 0b11111111, idx, a);
53497 let e = _mm256_set1_epi32(6);
53498 assert_eq_m256i(r, e);
53499 }
53500
53501 #[simd_test(enable = "avx512f,avx512vl")]
53502 unsafe fn test_mm256_maskz_permutexvar_epi32() {
53503 let idx = _mm256_set1_epi32(1);
53504 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53505 let r = _mm256_maskz_permutexvar_epi32(0, idx, a);
53506 assert_eq_m256i(r, _mm256_setzero_si256());
53507 let r = _mm256_maskz_permutexvar_epi32(0b11111111, idx, a);
53508 let e = _mm256_set1_epi32(6);
53509 assert_eq_m256i(r, e);
53510 }
53511
53512 #[simd_test(enable = "avx512f")]
53513 unsafe fn test_mm512_permutexvar_ps() {
53514 let idx = _mm512_set1_epi32(1);
53515 let a = _mm512_set_ps(
53516 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53517 );
53518 let r = _mm512_permutexvar_ps(idx, a);
53519 let e = _mm512_set1_ps(14.);
53520 assert_eq_m512(r, e);
53521 }
53522
53523 #[simd_test(enable = "avx512f")]
53524 unsafe fn test_mm512_mask_permutexvar_ps() {
53525 let idx = _mm512_set1_epi32(1);
53526 let a = _mm512_set_ps(
53527 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53528 );
53529 let r = _mm512_mask_permutexvar_ps(a, 0, idx, a);
53530 assert_eq_m512(r, a);
53531 let r = _mm512_mask_permutexvar_ps(a, 0b11111111_11111111, idx, a);
53532 let e = _mm512_set1_ps(14.);
53533 assert_eq_m512(r, e);
53534 }
53535
53536 #[simd_test(enable = "avx512f")]
53537 unsafe fn test_mm512_maskz_permutexvar_ps() {
53538 let idx = _mm512_set1_epi32(1);
53539 let a = _mm512_set_ps(
53540 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53541 );
53542 let r = _mm512_maskz_permutexvar_ps(0, idx, a);
53543 assert_eq_m512(r, _mm512_setzero_ps());
53544 let r = _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx, a);
53545 let e = _mm512_set_ps(
53546 0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14.,
53547 );
53548 assert_eq_m512(r, e);
53549 }
53550
53551 #[simd_test(enable = "avx512f,avx512vl")]
53552 unsafe fn test_mm256_permutexvar_ps() {
53553 let idx = _mm256_set1_epi32(1);
53554 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53555 let r = _mm256_permutexvar_ps(idx, a);
53556 let e = _mm256_set1_ps(6.);
53557 assert_eq_m256(r, e);
53558 }
53559
53560 #[simd_test(enable = "avx512f,avx512vl")]
53561 unsafe fn test_mm256_mask_permutexvar_ps() {
53562 let idx = _mm256_set1_epi32(1);
53563 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53564 let r = _mm256_mask_permutexvar_ps(a, 0, idx, a);
53565 assert_eq_m256(r, a);
53566 let r = _mm256_mask_permutexvar_ps(a, 0b11111111, idx, a);
53567 let e = _mm256_set1_ps(6.);
53568 assert_eq_m256(r, e);
53569 }
53570
53571 #[simd_test(enable = "avx512f,avx512vl")]
53572 unsafe fn test_mm256_maskz_permutexvar_ps() {
53573 let idx = _mm256_set1_epi32(1);
53574 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53575 let r = _mm256_maskz_permutexvar_ps(0, idx, a);
53576 assert_eq_m256(r, _mm256_setzero_ps());
53577 let r = _mm256_maskz_permutexvar_ps(0b11111111, idx, a);
53578 let e = _mm256_set1_ps(6.);
53579 assert_eq_m256(r, e);
53580 }
53581
53582 #[simd_test(enable = "avx512f")]
53583 unsafe fn test_mm512_permutex2var_epi32() {
53584 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53585 #[rustfmt::skip]
53586 let idx = _mm512_set_epi32(
53587 1, 1 << 4, 2, 1 << 4,
53588 3, 1 << 4, 4, 1 << 4,
53589 5, 1 << 4, 6, 1 << 4,
53590 7, 1 << 4, 8, 1 << 4,
53591 );
53592 let b = _mm512_set1_epi32(100);
53593 let r = _mm512_permutex2var_epi32(a, idx, b);
53594 let e = _mm512_set_epi32(
53595 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
53596 );
53597 assert_eq_m512i(r, e);
53598 }
53599
53600 #[simd_test(enable = "avx512f")]
53601 unsafe fn test_mm512_mask_permutex2var_epi32() {
53602 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53603 #[rustfmt::skip]
53604 let idx = _mm512_set_epi32(
53605 1, 1 << 4, 2, 1 << 4,
53606 3, 1 << 4, 4, 1 << 4,
53607 5, 1 << 4, 6, 1 << 4,
53608 7, 1 << 4, 8, 1 << 4,
53609 );
53610 let b = _mm512_set1_epi32(100);
53611 let r = _mm512_mask_permutex2var_epi32(a, 0, idx, b);
53612 assert_eq_m512i(r, a);
53613 let r = _mm512_mask_permutex2var_epi32(a, 0b11111111_11111111, idx, b);
53614 let e = _mm512_set_epi32(
53615 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
53616 );
53617 assert_eq_m512i(r, e);
53618 }
53619
53620 #[simd_test(enable = "avx512f")]
53621 unsafe fn test_mm512_maskz_permutex2var_epi32() {
53622 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53623 #[rustfmt::skip]
53624 let idx = _mm512_set_epi32(
53625 1, 1 << 4, 2, 1 << 4,
53626 3, 1 << 4, 4, 1 << 4,
53627 5, 1 << 4, 6, 1 << 4,
53628 7, 1 << 4, 8, 1 << 4,
53629 );
53630 let b = _mm512_set1_epi32(100);
53631 let r = _mm512_maskz_permutex2var_epi32(0, a, idx, b);
53632 assert_eq_m512i(r, _mm512_setzero_si512());
53633 let r = _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a, idx, b);
53634 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100);
53635 assert_eq_m512i(r, e);
53636 }
53637
53638 #[simd_test(enable = "avx512f")]
53639 unsafe fn test_mm512_mask2_permutex2var_epi32() {
53640 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53641 #[rustfmt::skip]
53642 let idx = _mm512_set_epi32(
53643 1000, 1 << 4, 2000, 1 << 4,
53644 3000, 1 << 4, 4000, 1 << 4,
53645 5, 1 << 4, 6, 1 << 4,
53646 7, 1 << 4, 8, 1 << 4,
53647 );
53648 let b = _mm512_set1_epi32(100);
53649 let r = _mm512_mask2_permutex2var_epi32(a, idx, 0, b);
53650 assert_eq_m512i(r, idx);
53651 let r = _mm512_mask2_permutex2var_epi32(a, idx, 0b00000000_11111111, b);
53652 #[rustfmt::skip]
53653 let e = _mm512_set_epi32(
53654 1000, 1 << 4, 2000, 1 << 4,
53655 3000, 1 << 4, 4000, 1 << 4,
53656 10, 100, 9, 100,
53657 8, 100, 7, 100,
53658 );
53659 assert_eq_m512i(r, e);
53660 }
53661
53662 #[simd_test(enable = "avx512f,avx512vl")]
53663 unsafe fn test_mm256_permutex2var_epi32() {
53664 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53665 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53666 let b = _mm256_set1_epi32(100);
53667 let r = _mm256_permutex2var_epi32(a, idx, b);
53668 let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53669 assert_eq_m256i(r, e);
53670 }
53671
53672 #[simd_test(enable = "avx512f,avx512vl")]
53673 unsafe fn test_mm256_mask_permutex2var_epi32() {
53674 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53675 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53676 let b = _mm256_set1_epi32(100);
53677 let r = _mm256_mask_permutex2var_epi32(a, 0, idx, b);
53678 assert_eq_m256i(r, a);
53679 let r = _mm256_mask_permutex2var_epi32(a, 0b11111111, idx, b);
53680 let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53681 assert_eq_m256i(r, e);
53682 }
53683
53684 #[simd_test(enable = "avx512f,avx512vl")]
53685 unsafe fn test_mm256_maskz_permutex2var_epi32() {
53686 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53687 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53688 let b = _mm256_set1_epi32(100);
53689 let r = _mm256_maskz_permutex2var_epi32(0, a, idx, b);
53690 assert_eq_m256i(r, _mm256_setzero_si256());
53691 let r = _mm256_maskz_permutex2var_epi32(0b11111111, a, idx, b);
53692 let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53693 assert_eq_m256i(r, e);
53694 }
53695
53696 #[simd_test(enable = "avx512f,avx512vl")]
53697 unsafe fn test_mm256_mask2_permutex2var_epi32() {
53698 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53699 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53700 let b = _mm256_set1_epi32(100);
53701 let r = _mm256_mask2_permutex2var_epi32(a, idx, 0, b);
53702 assert_eq_m256i(r, idx);
53703 let r = _mm256_mask2_permutex2var_epi32(a, idx, 0b11111111, b);
53704 let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53705 assert_eq_m256i(r, e);
53706 }
53707
53708 #[simd_test(enable = "avx512f,avx512vl")]
53709 unsafe fn test_mm_permutex2var_epi32() {
53710 let a = _mm_set_epi32(0, 1, 2, 3);
53711 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53712 let b = _mm_set1_epi32(100);
53713 let r = _mm_permutex2var_epi32(a, idx, b);
53714 let e = _mm_set_epi32(2, 100, 1, 100);
53715 assert_eq_m128i(r, e);
53716 }
53717
53718 #[simd_test(enable = "avx512f,avx512vl")]
53719 unsafe fn test_mm_mask_permutex2var_epi32() {
53720 let a = _mm_set_epi32(0, 1, 2, 3);
53721 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53722 let b = _mm_set1_epi32(100);
53723 let r = _mm_mask_permutex2var_epi32(a, 0, idx, b);
53724 assert_eq_m128i(r, a);
53725 let r = _mm_mask_permutex2var_epi32(a, 0b00001111, idx, b);
53726 let e = _mm_set_epi32(2, 100, 1, 100);
53727 assert_eq_m128i(r, e);
53728 }
53729
53730 #[simd_test(enable = "avx512f,avx512vl")]
53731 unsafe fn test_mm_maskz_permutex2var_epi32() {
53732 let a = _mm_set_epi32(0, 1, 2, 3);
53733 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53734 let b = _mm_set1_epi32(100);
53735 let r = _mm_maskz_permutex2var_epi32(0, a, idx, b);
53736 assert_eq_m128i(r, _mm_setzero_si128());
53737 let r = _mm_maskz_permutex2var_epi32(0b00001111, a, idx, b);
53738 let e = _mm_set_epi32(2, 100, 1, 100);
53739 assert_eq_m128i(r, e);
53740 }
53741
53742 #[simd_test(enable = "avx512f,avx512vl")]
53743 unsafe fn test_mm_mask2_permutex2var_epi32() {
53744 let a = _mm_set_epi32(0, 1, 2, 3);
53745 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53746 let b = _mm_set1_epi32(100);
53747 let r = _mm_mask2_permutex2var_epi32(a, idx, 0, b);
53748 assert_eq_m128i(r, idx);
53749 let r = _mm_mask2_permutex2var_epi32(a, idx, 0b00001111, b);
53750 let e = _mm_set_epi32(2, 100, 1, 100);
53751 assert_eq_m128i(r, e);
53752 }
53753
53754 #[simd_test(enable = "avx512f")]
53755 unsafe fn test_mm512_permutex2var_ps() {
53756 let a = _mm512_set_ps(
53757 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53758 );
53759 #[rustfmt::skip]
53760 let idx = _mm512_set_epi32(
53761 1, 1 << 4, 2, 1 << 4,
53762 3, 1 << 4, 4, 1 << 4,
53763 5, 1 << 4, 6, 1 << 4,
53764 7, 1 << 4, 8, 1 << 4,
53765 );
53766 let b = _mm512_set1_ps(100.);
53767 let r = _mm512_permutex2var_ps(a, idx, b);
53768 let e = _mm512_set_ps(
53769 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53770 );
53771 assert_eq_m512(r, e);
53772 }
53773
53774 #[simd_test(enable = "avx512f")]
53775 unsafe fn test_mm512_mask_permutex2var_ps() {
53776 let a = _mm512_set_ps(
53777 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53778 );
53779 #[rustfmt::skip]
53780 let idx = _mm512_set_epi32(
53781 1, 1 << 4, 2, 1 << 4,
53782 3, 1 << 4, 4, 1 << 4,
53783 5, 1 << 4, 6, 1 << 4,
53784 7, 1 << 4, 8, 1 << 4,
53785 );
53786 let b = _mm512_set1_ps(100.);
53787 let r = _mm512_mask_permutex2var_ps(a, 0, idx, b);
53788 assert_eq_m512(r, a);
53789 let r = _mm512_mask_permutex2var_ps(a, 0b11111111_11111111, idx, b);
53790 let e = _mm512_set_ps(
53791 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53792 );
53793 assert_eq_m512(r, e);
53794 }
53795
53796 #[simd_test(enable = "avx512f")]
53797 unsafe fn test_mm512_maskz_permutex2var_ps() {
53798 let a = _mm512_set_ps(
53799 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53800 );
53801 #[rustfmt::skip]
53802 let idx = _mm512_set_epi32(
53803 1, 1 << 4, 2, 1 << 4,
53804 3, 1 << 4, 4, 1 << 4,
53805 5, 1 << 4, 6, 1 << 4,
53806 7, 1 << 4, 8, 1 << 4,
53807 );
53808 let b = _mm512_set1_ps(100.);
53809 let r = _mm512_maskz_permutex2var_ps(0, a, idx, b);
53810 assert_eq_m512(r, _mm512_setzero_ps());
53811 let r = _mm512_maskz_permutex2var_ps(0b00000000_11111111, a, idx, b);
53812 let e = _mm512_set_ps(
53813 0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
53814 );
53815 assert_eq_m512(r, e);
53816 }
53817
53818 #[simd_test(enable = "avx512f")]
53819 unsafe fn test_mm512_mask2_permutex2var_ps() {
53820 let a = _mm512_set_ps(
53821 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53822 );
53823 #[rustfmt::skip]
53824 let idx = _mm512_set_epi32(
53825 1, 1 << 4, 2, 1 << 4,
53826 3, 1 << 4, 4, 1 << 4,
53827 5, 1 << 4, 6, 1 << 4,
53828 7, 1 << 4, 8, 1 << 4,
53829 );
53830 let b = _mm512_set1_ps(100.);
53831 let r = _mm512_mask2_permutex2var_ps(a, idx, 0, b);
53832 assert_eq_m512(r, _mm512_castsi512_ps(idx));
53833 let r = _mm512_mask2_permutex2var_ps(a, idx, 0b11111111_11111111, b);
53834 let e = _mm512_set_ps(
53835 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53836 );
53837 assert_eq_m512(r, e);
53838 }
53839
53840 #[simd_test(enable = "avx512f,avx512vl")]
53841 unsafe fn test_mm256_permutex2var_ps() {
53842 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53843 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53844 let b = _mm256_set1_ps(100.);
53845 let r = _mm256_permutex2var_ps(a, idx, b);
53846 let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53847 assert_eq_m256(r, e);
53848 }
53849
53850 #[simd_test(enable = "avx512f,avx512vl")]
53851 unsafe fn test_mm256_mask_permutex2var_ps() {
53852 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53853 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53854 let b = _mm256_set1_ps(100.);
53855 let r = _mm256_mask_permutex2var_ps(a, 0, idx, b);
53856 assert_eq_m256(r, a);
53857 let r = _mm256_mask_permutex2var_ps(a, 0b11111111, idx, b);
53858 let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53859 assert_eq_m256(r, e);
53860 }
53861
53862 #[simd_test(enable = "avx512f,avx512vl")]
53863 unsafe fn test_mm256_maskz_permutex2var_ps() {
53864 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53865 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53866 let b = _mm256_set1_ps(100.);
53867 let r = _mm256_maskz_permutex2var_ps(0, a, idx, b);
53868 assert_eq_m256(r, _mm256_setzero_ps());
53869 let r = _mm256_maskz_permutex2var_ps(0b11111111, a, idx, b);
53870 let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53871 assert_eq_m256(r, e);
53872 }
53873
53874 #[simd_test(enable = "avx512f,avx512vl")]
53875 unsafe fn test_mm256_mask2_permutex2var_ps() {
53876 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53877 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53878 let b = _mm256_set1_ps(100.);
53879 let r = _mm256_mask2_permutex2var_ps(a, idx, 0, b);
53880 assert_eq_m256(r, _mm256_castsi256_ps(idx));
53881 let r = _mm256_mask2_permutex2var_ps(a, idx, 0b11111111, b);
53882 let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53883 assert_eq_m256(r, e);
53884 }
53885
53886 #[simd_test(enable = "avx512f,avx512vl")]
53887 unsafe fn test_mm_permutex2var_ps() {
53888 let a = _mm_set_ps(0., 1., 2., 3.);
53889 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53890 let b = _mm_set1_ps(100.);
53891 let r = _mm_permutex2var_ps(a, idx, b);
53892 let e = _mm_set_ps(2., 100., 1., 100.);
53893 assert_eq_m128(r, e);
53894 }
53895
53896 #[simd_test(enable = "avx512f,avx512vl")]
53897 unsafe fn test_mm_mask_permutex2var_ps() {
53898 let a = _mm_set_ps(0., 1., 2., 3.);
53899 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53900 let b = _mm_set1_ps(100.);
53901 let r = _mm_mask_permutex2var_ps(a, 0, idx, b);
53902 assert_eq_m128(r, a);
53903 let r = _mm_mask_permutex2var_ps(a, 0b00001111, idx, b);
53904 let e = _mm_set_ps(2., 100., 1., 100.);
53905 assert_eq_m128(r, e);
53906 }
53907
53908 #[simd_test(enable = "avx512f,avx512vl")]
53909 unsafe fn test_mm_maskz_permutex2var_ps() {
53910 let a = _mm_set_ps(0., 1., 2., 3.);
53911 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53912 let b = _mm_set1_ps(100.);
53913 let r = _mm_maskz_permutex2var_ps(0, a, idx, b);
53914 assert_eq_m128(r, _mm_setzero_ps());
53915 let r = _mm_maskz_permutex2var_ps(0b00001111, a, idx, b);
53916 let e = _mm_set_ps(2., 100., 1., 100.);
53917 assert_eq_m128(r, e);
53918 }
53919
53920 #[simd_test(enable = "avx512f,avx512vl")]
53921 unsafe fn test_mm_mask2_permutex2var_ps() {
53922 let a = _mm_set_ps(0., 1., 2., 3.);
53923 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53924 let b = _mm_set1_ps(100.);
53925 let r = _mm_mask2_permutex2var_ps(a, idx, 0, b);
53926 assert_eq_m128(r, _mm_castsi128_ps(idx));
53927 let r = _mm_mask2_permutex2var_ps(a, idx, 0b00001111, b);
53928 let e = _mm_set_ps(2., 100., 1., 100.);
53929 assert_eq_m128(r, e);
53930 }
53931
53932 #[simd_test(enable = "avx512f")]
53933 unsafe fn test_mm512_shuffle_epi32() {
53934 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53935 let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a);
53936 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
53937 assert_eq_m512i(r, e);
53938 }
53939
53940 #[simd_test(enable = "avx512f")]
53941 unsafe fn test_mm512_mask_shuffle_epi32() {
53942 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53943 let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53944 assert_eq_m512i(r, a);
53945 let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111_11111111, a);
53946 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
53947 assert_eq_m512i(r, e);
53948 }
53949
53950 #[simd_test(enable = "avx512f")]
53951 unsafe fn test_mm512_maskz_shuffle_epi32() {
53952 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53953 let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53954 assert_eq_m512i(r, _mm512_setzero_si512());
53955 let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00000000_11111111, a);
53956 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0);
53957 assert_eq_m512i(r, e);
53958 }
53959
53960 #[simd_test(enable = "avx512f,avx512vl")]
53961 unsafe fn test_mm256_mask_shuffle_epi32() {
53962 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
53963 let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53964 assert_eq_m256i(r, a);
53965 let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111, a);
53966 let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
53967 assert_eq_m256i(r, e);
53968 }
53969
53970 #[simd_test(enable = "avx512f,avx512vl")]
53971 unsafe fn test_mm256_maskz_shuffle_epi32() {
53972 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
53973 let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53974 assert_eq_m256i(r, _mm256_setzero_si256());
53975 let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b11111111, a);
53976 let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
53977 assert_eq_m256i(r, e);
53978 }
53979
53980 #[simd_test(enable = "avx512f,avx512vl")]
53981 unsafe fn test_mm_mask_shuffle_epi32() {
53982 let a = _mm_set_epi32(1, 4, 5, 8);
53983 let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53984 assert_eq_m128i(r, a);
53985 let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b00001111, a);
53986 let e = _mm_set_epi32(8, 8, 1, 1);
53987 assert_eq_m128i(r, e);
53988 }
53989
53990 #[simd_test(enable = "avx512f,avx512vl")]
53991 unsafe fn test_mm_maskz_shuffle_epi32() {
53992 let a = _mm_set_epi32(1, 4, 5, 8);
53993 let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53994 assert_eq_m128i(r, _mm_setzero_si128());
53995 let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00001111, a);
53996 let e = _mm_set_epi32(8, 8, 1, 1);
53997 assert_eq_m128i(r, e);
53998 }
53999
54000 #[simd_test(enable = "avx512f")]
54001 unsafe fn test_mm512_shuffle_ps() {
54002 let a = _mm512_setr_ps(
54003 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54004 );
54005 let b = _mm512_setr_ps(
54006 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54007 );
54008 let r = _mm512_shuffle_ps::<0b00_00_11_11>(a, b);
54009 let e = _mm512_setr_ps(
54010 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
54011 );
54012 assert_eq_m512(r, e);
54013 }
54014
54015 #[simd_test(enable = "avx512f")]
54016 unsafe fn test_mm512_mask_shuffle_ps() {
54017 let a = _mm512_setr_ps(
54018 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54019 );
54020 let b = _mm512_setr_ps(
54021 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54022 );
54023 let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0, a, b);
54024 assert_eq_m512(r, a);
54025 let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111_11111111, a, b);
54026 let e = _mm512_setr_ps(
54027 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
54028 );
54029 assert_eq_m512(r, e);
54030 }
54031
54032 #[simd_test(enable = "avx512f")]
54033 unsafe fn test_mm512_maskz_shuffle_ps() {
54034 let a = _mm512_setr_ps(
54035 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54036 );
54037 let b = _mm512_setr_ps(
54038 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54039 );
54040 let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0, a, b);
54041 assert_eq_m512(r, _mm512_setzero_ps());
54042 let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0b00000000_11111111, a, b);
54043 let e = _mm512_setr_ps(
54044 8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0.,
54045 );
54046 assert_eq_m512(r, e);
54047 }
54048
54049 #[simd_test(enable = "avx512f,avx512vl")]
54050 unsafe fn test_mm256_mask_shuffle_ps() {
54051 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54052 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54053 let r = _mm256_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
54054 assert_eq_m256(r, a);
54055 let r = _mm256_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111, a, b);
54056 let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
54057 assert_eq_m256(r, e);
54058 }
54059
54060 #[simd_test(enable = "avx512f,avx512vl")]
54061 unsafe fn test_mm256_maskz_shuffle_ps() {
54062 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54063 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54064 let r = _mm256_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
54065 assert_eq_m256(r, _mm256_setzero_ps());
54066 let r = _mm256_maskz_shuffle_ps::<0b00_00_11_11>(0b11111111, a, b);
54067 let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
54068 assert_eq_m256(r, e);
54069 }
54070
54071 #[simd_test(enable = "avx512f,avx512vl")]
54072 unsafe fn test_mm_mask_shuffle_ps() {
54073 let a = _mm_set_ps(1., 4., 5., 8.);
54074 let b = _mm_set_ps(2., 3., 6., 7.);
54075 let r = _mm_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
54076 assert_eq_m128(r, a);
54077 let r = _mm_mask_shuffle_ps::<0b00_00_11_11>(a, 0b00001111, a, b);
54078 let e = _mm_set_ps(7., 7., 1., 1.);
54079 assert_eq_m128(r, e);
54080 }
54081
54082 #[simd_test(enable = "avx512f,avx512vl")]
54083 unsafe fn test_mm_maskz_shuffle_ps() {
54084 let a = _mm_set_ps(1., 4., 5., 8.);
54085 let b = _mm_set_ps(2., 3., 6., 7.);
54086 let r = _mm_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
54087 assert_eq_m128(r, _mm_setzero_ps());
54088 let r = _mm_maskz_shuffle_ps::<0b00_00_11_11>(0b00001111, a, b);
54089 let e = _mm_set_ps(7., 7., 1., 1.);
54090 assert_eq_m128(r, e);
54091 }
54092
54093 #[simd_test(enable = "avx512f")]
54094 unsafe fn test_mm512_shuffle_i32x4() {
54095 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54096 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54097 let r = _mm512_shuffle_i32x4::<0b00_00_00_00>(a, b);
54098 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
54099 assert_eq_m512i(r, e);
54100 }
54101
54102 #[simd_test(enable = "avx512f")]
54103 unsafe fn test_mm512_mask_shuffle_i32x4() {
54104 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54105 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54106 let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0, a, b);
54107 assert_eq_m512i(r, a);
54108 let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
54109 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
54110 assert_eq_m512i(r, e);
54111 }
54112
54113 #[simd_test(enable = "avx512f")]
54114 unsafe fn test_mm512_maskz_shuffle_i32x4() {
54115 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54116 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54117 let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0, a, b);
54118 assert_eq_m512i(r, _mm512_setzero_si512());
54119 let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
54120 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0);
54121 assert_eq_m512i(r, e);
54122 }
54123
54124 #[simd_test(enable = "avx512f,avx512vl")]
54125 unsafe fn test_mm256_shuffle_i32x4() {
54126 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54127 let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54128 let r = _mm256_shuffle_i32x4::<0b00>(a, b);
54129 let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54130 assert_eq_m256i(r, e);
54131 }
54132
54133 #[simd_test(enable = "avx512f,avx512vl")]
54134 unsafe fn test_mm256_mask_shuffle_i32x4() {
54135 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54136 let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54137 let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0, a, b);
54138 assert_eq_m256i(r, a);
54139 let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0b11111111, a, b);
54140 let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54141 assert_eq_m256i(r, e);
54142 }
54143
54144 #[simd_test(enable = "avx512f,avx512vl")]
54145 unsafe fn test_mm256_maskz_shuffle_i32x4() {
54146 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54147 let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54148 let r = _mm256_maskz_shuffle_i32x4::<0b00>(0, a, b);
54149 assert_eq_m256i(r, _mm256_setzero_si256());
54150 let r = _mm256_maskz_shuffle_i32x4::<0b00>(0b11111111, a, b);
54151 let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54152 assert_eq_m256i(r, e);
54153 }
54154
54155 #[simd_test(enable = "avx512f")]
54156 unsafe fn test_mm512_shuffle_f32x4() {
54157 let a = _mm512_setr_ps(
54158 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54159 );
54160 let b = _mm512_setr_ps(
54161 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54162 );
54163 let r = _mm512_shuffle_f32x4::<0b00_00_00_00>(a, b);
54164 let e = _mm512_setr_ps(
54165 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
54166 );
54167 assert_eq_m512(r, e);
54168 }
54169
54170 #[simd_test(enable = "avx512f")]
54171 unsafe fn test_mm512_mask_shuffle_f32x4() {
54172 let a = _mm512_setr_ps(
54173 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54174 );
54175 let b = _mm512_setr_ps(
54176 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54177 );
54178 let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0, a, b);
54179 assert_eq_m512(r, a);
54180 let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
54181 let e = _mm512_setr_ps(
54182 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
54183 );
54184 assert_eq_m512(r, e);
54185 }
54186
54187 #[simd_test(enable = "avx512f")]
54188 unsafe fn test_mm512_maskz_shuffle_f32x4() {
54189 let a = _mm512_setr_ps(
54190 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54191 );
54192 let b = _mm512_setr_ps(
54193 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54194 );
54195 let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0, a, b);
54196 assert_eq_m512(r, _mm512_setzero_ps());
54197 let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
54198 let e = _mm512_setr_ps(
54199 1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54200 );
54201 assert_eq_m512(r, e);
54202 }
54203
54204 #[simd_test(enable = "avx512f,avx512vl")]
54205 unsafe fn test_mm256_shuffle_f32x4() {
54206 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54207 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54208 let r = _mm256_shuffle_f32x4::<0b00>(a, b);
54209 let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54210 assert_eq_m256(r, e);
54211 }
54212
54213 #[simd_test(enable = "avx512f,avx512vl")]
54214 unsafe fn test_mm256_mask_shuffle_f32x4() {
54215 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54216 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54217 let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0, a, b);
54218 assert_eq_m256(r, a);
54219 let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0b11111111, a, b);
54220 let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54221 assert_eq_m256(r, e);
54222 }
54223
54224 #[simd_test(enable = "avx512f,avx512vl")]
54225 unsafe fn test_mm256_maskz_shuffle_f32x4() {
54226 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54227 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54228 let r = _mm256_maskz_shuffle_f32x4::<0b00>(0, a, b);
54229 assert_eq_m256(r, _mm256_setzero_ps());
54230 let r = _mm256_maskz_shuffle_f32x4::<0b00>(0b11111111, a, b);
54231 let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54232 assert_eq_m256(r, e);
54233 }
54234
54235 #[simd_test(enable = "avx512f")]
54236 unsafe fn test_mm512_extractf32x4_ps() {
54237 let a = _mm512_setr_ps(
54238 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54239 );
54240 let r = _mm512_extractf32x4_ps::<1>(a);
54241 let e = _mm_setr_ps(5., 6., 7., 8.);
54242 assert_eq_m128(r, e);
54243 }
54244
54245 #[simd_test(enable = "avx512f")]
54246 unsafe fn test_mm512_mask_extractf32x4_ps() {
54247 let a = _mm512_setr_ps(
54248 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54249 );
54250 let src = _mm_set1_ps(100.);
54251 let r = _mm512_mask_extractf32x4_ps::<1>(src, 0, a);
54252 assert_eq_m128(r, src);
54253 let r = _mm512_mask_extractf32x4_ps::<1>(src, 0b11111111, a);
54254 let e = _mm_setr_ps(5., 6., 7., 8.);
54255 assert_eq_m128(r, e);
54256 }
54257
54258 #[simd_test(enable = "avx512f")]
54259 unsafe fn test_mm512_maskz_extractf32x4_ps() {
54260 let a = _mm512_setr_ps(
54261 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54262 );
54263 let r = _mm512_maskz_extractf32x4_ps::<1>(0, a);
54264 assert_eq_m128(r, _mm_setzero_ps());
54265 let r = _mm512_maskz_extractf32x4_ps::<1>(0b00000001, a);
54266 let e = _mm_setr_ps(5., 0., 0., 0.);
54267 assert_eq_m128(r, e);
54268 }
54269
54270 #[simd_test(enable = "avx512f,avx512vl")]
54271 unsafe fn test_mm256_extractf32x4_ps() {
54272 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54273 let r = _mm256_extractf32x4_ps::<1>(a);
54274 let e = _mm_set_ps(1., 2., 3., 4.);
54275 assert_eq_m128(r, e);
54276 }
54277
54278 #[simd_test(enable = "avx512f,avx512vl")]
54279 unsafe fn test_mm256_mask_extractf32x4_ps() {
54280 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54281 let src = _mm_set1_ps(100.);
54282 let r = _mm256_mask_extractf32x4_ps::<1>(src, 0, a);
54283 assert_eq_m128(r, src);
54284 let r = _mm256_mask_extractf32x4_ps::<1>(src, 0b00001111, a);
54285 let e = _mm_set_ps(1., 2., 3., 4.);
54286 assert_eq_m128(r, e);
54287 }
54288
54289 #[simd_test(enable = "avx512f,avx512vl")]
54290 unsafe fn test_mm256_maskz_extractf32x4_ps() {
54291 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54292 let r = _mm256_maskz_extractf32x4_ps::<1>(0, a);
54293 assert_eq_m128(r, _mm_setzero_ps());
54294 let r = _mm256_maskz_extractf32x4_ps::<1>(0b00001111, a);
54295 let e = _mm_set_ps(1., 2., 3., 4.);
54296 assert_eq_m128(r, e);
54297 }
54298
54299 #[simd_test(enable = "avx512f")]
54300 unsafe fn test_mm512_extracti32x4_epi32() {
54301 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54302 let r = _mm512_extracti32x4_epi32::<1>(a);
54303 let e = _mm_setr_epi32(5, 6, 7, 8);
54304 assert_eq_m128i(r, e);
54305 }
54306
54307 #[simd_test(enable = "avx512f")]
54308 unsafe fn test_mm512_mask_extracti32x4_epi32() {
54309 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54310 let src = _mm_set1_epi32(100);
54311 let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0, a);
54312 assert_eq_m128i(r, src);
54313 let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0b11111111, a);
54314 let e = _mm_setr_epi32(5, 6, 7, 8);
54315 assert_eq_m128i(r, e);
54316 }
54317
54318 #[simd_test(enable = "avx512f,avx512vl")]
54319 unsafe fn test_mm512_maskz_extracti32x4_epi32() {
54320 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54321 let r = _mm512_maskz_extracti32x4_epi32::<1>(0, a);
54322 assert_eq_m128i(r, _mm_setzero_si128());
54323 let r = _mm512_maskz_extracti32x4_epi32::<1>(0b00000001, a);
54324 let e = _mm_setr_epi32(5, 0, 0, 0);
54325 assert_eq_m128i(r, e);
54326 }
54327
54328 #[simd_test(enable = "avx512f,avx512vl")]
54329 unsafe fn test_mm256_extracti32x4_epi32() {
54330 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54331 let r = _mm256_extracti32x4_epi32::<1>(a);
54332 let e = _mm_set_epi32(1, 2, 3, 4);
54333 assert_eq_m128i(r, e);
54334 }
54335
54336 #[simd_test(enable = "avx512f,avx512vl")]
54337 unsafe fn test_mm256_mask_extracti32x4_epi32() {
54338 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54339 let src = _mm_set1_epi32(100);
54340 let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0, a);
54341 assert_eq_m128i(r, src);
54342 let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0b00001111, a);
54343 let e = _mm_set_epi32(1, 2, 3, 4);
54344 assert_eq_m128i(r, e);
54345 }
54346
54347 #[simd_test(enable = "avx512f,avx512vl")]
54348 unsafe fn test_mm256_maskz_extracti32x4_epi32() {
54349 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54350 let r = _mm256_maskz_extracti32x4_epi32::<1>(0, a);
54351 assert_eq_m128i(r, _mm_setzero_si128());
54352 let r = _mm256_maskz_extracti32x4_epi32::<1>(0b00001111, a);
54353 let e = _mm_set_epi32(1, 2, 3, 4);
54354 assert_eq_m128i(r, e);
54355 }
54356
54357 #[simd_test(enable = "avx512f")]
54358 unsafe fn test_mm512_moveldup_ps() {
54359 let a = _mm512_setr_ps(
54360 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54361 );
54362 let r = _mm512_moveldup_ps(a);
54363 let e = _mm512_setr_ps(
54364 1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
54365 );
54366 assert_eq_m512(r, e);
54367 }
54368
54369 #[simd_test(enable = "avx512f")]
54370 unsafe fn test_mm512_mask_moveldup_ps() {
54371 let a = _mm512_setr_ps(
54372 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54373 );
54374 let r = _mm512_mask_moveldup_ps(a, 0, a);
54375 assert_eq_m512(r, a);
54376 let r = _mm512_mask_moveldup_ps(a, 0b11111111_11111111, a);
54377 let e = _mm512_setr_ps(
54378 1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
54379 );
54380 assert_eq_m512(r, e);
54381 }
54382
54383 #[simd_test(enable = "avx512f")]
54384 unsafe fn test_mm512_maskz_moveldup_ps() {
54385 let a = _mm512_setr_ps(
54386 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54387 );
54388 let r = _mm512_maskz_moveldup_ps(0, a);
54389 assert_eq_m512(r, _mm512_setzero_ps());
54390 let r = _mm512_maskz_moveldup_ps(0b00000000_11111111, a);
54391 let e = _mm512_setr_ps(
54392 1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
54393 );
54394 assert_eq_m512(r, e);
54395 }
54396
54397 #[simd_test(enable = "avx512f,avx512vl")]
54398 unsafe fn test_mm256_mask_moveldup_ps() {
54399 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54400 let r = _mm256_mask_moveldup_ps(a, 0, a);
54401 assert_eq_m256(r, a);
54402 let r = _mm256_mask_moveldup_ps(a, 0b11111111, a);
54403 let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
54404 assert_eq_m256(r, e);
54405 }
54406
54407 #[simd_test(enable = "avx512f,avx512vl")]
54408 unsafe fn test_mm256_maskz_moveldup_ps() {
54409 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54410 let r = _mm256_maskz_moveldup_ps(0, a);
54411 assert_eq_m256(r, _mm256_setzero_ps());
54412 let r = _mm256_maskz_moveldup_ps(0b11111111, a);
54413 let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
54414 assert_eq_m256(r, e);
54415 }
54416
54417 #[simd_test(enable = "avx512f,avx512vl")]
54418 unsafe fn test_mm_mask_moveldup_ps() {
54419 let a = _mm_set_ps(1., 2., 3., 4.);
54420 let r = _mm_mask_moveldup_ps(a, 0, a);
54421 assert_eq_m128(r, a);
54422 let r = _mm_mask_moveldup_ps(a, 0b00001111, a);
54423 let e = _mm_set_ps(2., 2., 4., 4.);
54424 assert_eq_m128(r, e);
54425 }
54426
54427 #[simd_test(enable = "avx512f,avx512vl")]
54428 unsafe fn test_mm_maskz_moveldup_ps() {
54429 let a = _mm_set_ps(1., 2., 3., 4.);
54430 let r = _mm_maskz_moveldup_ps(0, a);
54431 assert_eq_m128(r, _mm_setzero_ps());
54432 let r = _mm_maskz_moveldup_ps(0b00001111, a);
54433 let e = _mm_set_ps(2., 2., 4., 4.);
54434 assert_eq_m128(r, e);
54435 }
54436
54437 #[simd_test(enable = "avx512f")]
54438 unsafe fn test_mm512_movehdup_ps() {
54439 let a = _mm512_setr_ps(
54440 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54441 );
54442 let r = _mm512_movehdup_ps(a);
54443 let e = _mm512_setr_ps(
54444 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
54445 );
54446 assert_eq_m512(r, e);
54447 }
54448
54449 #[simd_test(enable = "avx512f")]
54450 unsafe fn test_mm512_mask_movehdup_ps() {
54451 let a = _mm512_setr_ps(
54452 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54453 );
54454 let r = _mm512_mask_movehdup_ps(a, 0, a);
54455 assert_eq_m512(r, a);
54456 let r = _mm512_mask_movehdup_ps(a, 0b11111111_11111111, a);
54457 let e = _mm512_setr_ps(
54458 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
54459 );
54460 assert_eq_m512(r, e);
54461 }
54462
54463 #[simd_test(enable = "avx512f")]
54464 unsafe fn test_mm512_maskz_movehdup_ps() {
54465 let a = _mm512_setr_ps(
54466 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54467 );
54468 let r = _mm512_maskz_movehdup_ps(0, a);
54469 assert_eq_m512(r, _mm512_setzero_ps());
54470 let r = _mm512_maskz_movehdup_ps(0b00000000_11111111, a);
54471 let e = _mm512_setr_ps(
54472 2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54473 );
54474 assert_eq_m512(r, e);
54475 }
54476
54477 #[simd_test(enable = "avx512f,avx512vl")]
54478 unsafe fn test_mm256_mask_movehdup_ps() {
54479 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54480 let r = _mm256_mask_movehdup_ps(a, 0, a);
54481 assert_eq_m256(r, a);
54482 let r = _mm256_mask_movehdup_ps(a, 0b11111111, a);
54483 let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
54484 assert_eq_m256(r, e);
54485 }
54486
54487 #[simd_test(enable = "avx512f,avx512vl")]
54488 unsafe fn test_mm256_maskz_movehdup_ps() {
54489 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54490 let r = _mm256_maskz_movehdup_ps(0, a);
54491 assert_eq_m256(r, _mm256_setzero_ps());
54492 let r = _mm256_maskz_movehdup_ps(0b11111111, a);
54493 let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
54494 assert_eq_m256(r, e);
54495 }
54496
54497 #[simd_test(enable = "avx512f,avx512vl")]
54498 unsafe fn test_mm_mask_movehdup_ps() {
54499 let a = _mm_set_ps(1., 2., 3., 4.);
54500 let r = _mm_mask_movehdup_ps(a, 0, a);
54501 assert_eq_m128(r, a);
54502 let r = _mm_mask_movehdup_ps(a, 0b00001111, a);
54503 let e = _mm_set_ps(1., 1., 3., 3.);
54504 assert_eq_m128(r, e);
54505 }
54506
54507 #[simd_test(enable = "avx512f,avx512vl")]
54508 unsafe fn test_mm_maskz_movehdup_ps() {
54509 let a = _mm_set_ps(1., 2., 3., 4.);
54510 let r = _mm_maskz_movehdup_ps(0, a);
54511 assert_eq_m128(r, _mm_setzero_ps());
54512 let r = _mm_maskz_movehdup_ps(0b00001111, a);
54513 let e = _mm_set_ps(1., 1., 3., 3.);
54514 assert_eq_m128(r, e);
54515 }
54516
54517 #[simd_test(enable = "avx512f")]
54518 unsafe fn test_mm512_inserti32x4() {
54519 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54520 let b = _mm_setr_epi32(17, 18, 19, 20);
54521 let r = _mm512_inserti32x4::<0>(a, b);
54522 let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54523 assert_eq_m512i(r, e);
54524 }
54525
54526 #[simd_test(enable = "avx512f")]
54527 unsafe fn test_mm512_mask_inserti32x4() {
54528 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54529 let b = _mm_setr_epi32(17, 18, 19, 20);
54530 let r = _mm512_mask_inserti32x4::<0>(a, 0, a, b);
54531 assert_eq_m512i(r, a);
54532 let r = _mm512_mask_inserti32x4::<0>(a, 0b11111111_11111111, a, b);
54533 let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54534 assert_eq_m512i(r, e);
54535 }
54536
54537 #[simd_test(enable = "avx512f")]
54538 unsafe fn test_mm512_maskz_inserti32x4() {
54539 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54540 let b = _mm_setr_epi32(17, 18, 19, 20);
54541 let r = _mm512_maskz_inserti32x4::<0>(0, a, b);
54542 assert_eq_m512i(r, _mm512_setzero_si512());
54543 let r = _mm512_maskz_inserti32x4::<0>(0b00000000_11111111, a, b);
54544 let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0);
54545 assert_eq_m512i(r, e);
54546 }
54547
54548 #[simd_test(enable = "avx512f,avx512vl")]
54549 unsafe fn test_mm256_inserti32x4() {
54550 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54551 let b = _mm_set_epi32(17, 18, 19, 20);
54552 let r = _mm256_inserti32x4::<1>(a, b);
54553 let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54554 assert_eq_m256i(r, e);
54555 }
54556
54557 #[simd_test(enable = "avx512f,avx512vl")]
54558 unsafe fn test_mm256_mask_inserti32x4() {
54559 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54560 let b = _mm_set_epi32(17, 18, 19, 20);
54561 let r = _mm256_mask_inserti32x4::<0>(a, 0, a, b);
54562 assert_eq_m256i(r, a);
54563 let r = _mm256_mask_inserti32x4::<1>(a, 0b11111111, a, b);
54564 let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54565 assert_eq_m256i(r, e);
54566 }
54567
54568 #[simd_test(enable = "avx512f,avx512vl")]
54569 unsafe fn test_mm256_maskz_inserti32x4() {
54570 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54571 let b = _mm_set_epi32(17, 18, 19, 20);
54572 let r = _mm256_maskz_inserti32x4::<0>(0, a, b);
54573 assert_eq_m256i(r, _mm256_setzero_si256());
54574 let r = _mm256_maskz_inserti32x4::<1>(0b11111111, a, b);
54575 let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54576 assert_eq_m256i(r, e);
54577 }
54578
54579 #[simd_test(enable = "avx512f")]
54580 unsafe fn test_mm512_insertf32x4() {
54581 let a = _mm512_setr_ps(
54582 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54583 );
54584 let b = _mm_setr_ps(17., 18., 19., 20.);
54585 let r = _mm512_insertf32x4::<0>(a, b);
54586 let e = _mm512_setr_ps(
54587 17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54588 );
54589 assert_eq_m512(r, e);
54590 }
54591
54592 #[simd_test(enable = "avx512f")]
54593 unsafe fn test_mm512_mask_insertf32x4() {
54594 let a = _mm512_setr_ps(
54595 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54596 );
54597 let b = _mm_setr_ps(17., 18., 19., 20.);
54598 let r = _mm512_mask_insertf32x4::<0>(a, 0, a, b);
54599 assert_eq_m512(r, a);
54600 let r = _mm512_mask_insertf32x4::<0>(a, 0b11111111_11111111, a, b);
54601 let e = _mm512_setr_ps(
54602 17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54603 );
54604 assert_eq_m512(r, e);
54605 }
54606
54607 #[simd_test(enable = "avx512f")]
54608 unsafe fn test_mm512_maskz_insertf32x4() {
54609 let a = _mm512_setr_ps(
54610 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54611 );
54612 let b = _mm_setr_ps(17., 18., 19., 20.);
54613 let r = _mm512_maskz_insertf32x4::<0>(0, a, b);
54614 assert_eq_m512(r, _mm512_setzero_ps());
54615 let r = _mm512_maskz_insertf32x4::<0>(0b00000000_11111111, a, b);
54616 let e = _mm512_setr_ps(
54617 17., 18., 19., 20., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54618 );
54619 assert_eq_m512(r, e);
54620 }
54621
54622 #[simd_test(enable = "avx512f,avx512vl")]
54623 unsafe fn test_mm256_insertf32x4() {
54624 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54625 let b = _mm_set_ps(17., 18., 19., 20.);
54626 let r = _mm256_insertf32x4::<1>(a, b);
54627 let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54628 assert_eq_m256(r, e);
54629 }
54630
54631 #[simd_test(enable = "avx512f,avx512vl")]
54632 unsafe fn test_mm256_mask_insertf32x4() {
54633 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54634 let b = _mm_set_ps(17., 18., 19., 20.);
54635 let r = _mm256_mask_insertf32x4::<0>(a, 0, a, b);
54636 assert_eq_m256(r, a);
54637 let r = _mm256_mask_insertf32x4::<1>(a, 0b11111111, a, b);
54638 let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54639 assert_eq_m256(r, e);
54640 }
54641
54642 #[simd_test(enable = "avx512f,avx512vl")]
54643 unsafe fn test_mm256_maskz_insertf32x4() {
54644 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54645 let b = _mm_set_ps(17., 18., 19., 20.);
54646 let r = _mm256_maskz_insertf32x4::<0>(0, a, b);
54647 assert_eq_m256(r, _mm256_setzero_ps());
54648 let r = _mm256_maskz_insertf32x4::<1>(0b11111111, a, b);
54649 let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54650 assert_eq_m256(r, e);
54651 }
54652
54653 #[simd_test(enable = "avx512f")]
54654 unsafe fn test_mm512_castps128_ps512() {
54655 let a = _mm_setr_ps(17., 18., 19., 20.);
54656 let r = _mm512_castps128_ps512(a);
54657 assert_eq_m128(_mm512_castps512_ps128(r), a);
54658 }
54659
54660 #[simd_test(enable = "avx512f")]
54661 unsafe fn test_mm512_castps256_ps512() {
54662 let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54663 let r = _mm512_castps256_ps512(a);
54664 assert_eq_m256(_mm512_castps512_ps256(r), a);
54665 }
54666
54667 #[simd_test(enable = "avx512f")]
54668 unsafe fn test_mm512_zextps128_ps512() {
54669 let a = _mm_setr_ps(17., 18., 19., 20.);
54670 let r = _mm512_zextps128_ps512(a);
54671 let e = _mm512_setr_ps(
54672 17., 18., 19., 20., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
54673 );
54674 assert_eq_m512(r, e);
54675 }
54676
54677 #[simd_test(enable = "avx512f")]
54678 unsafe fn test_mm512_zextps256_ps512() {
54679 let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54680 let r = _mm512_zextps256_ps512(a);
54681 let e = _mm512_setr_ps(
54682 17., 18., 19., 20., 21., 22., 23., 24., 0., 0., 0., 0., 0., 0., 0., 0.,
54683 );
54684 assert_eq_m512(r, e);
54685 }
54686
54687 #[simd_test(enable = "avx512f")]
54688 unsafe fn test_mm512_castps512_ps128() {
54689 let a = _mm512_setr_ps(
54690 17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
54691 );
54692 let r = _mm512_castps512_ps128(a);
54693 let e = _mm_setr_ps(17., 18., 19., 20.);
54694 assert_eq_m128(r, e);
54695 }
54696
54697 #[simd_test(enable = "avx512f")]
54698 unsafe fn test_mm512_castps512_ps256() {
54699 let a = _mm512_setr_ps(
54700 17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
54701 );
54702 let r = _mm512_castps512_ps256(a);
54703 let e = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54704 assert_eq_m256(r, e);
54705 }
54706
54707 #[simd_test(enable = "avx512f")]
54708 unsafe fn test_mm512_castps_pd() {
54709 let a = _mm512_set1_ps(1.);
54710 let r = _mm512_castps_pd(a);
54711 let e = _mm512_set1_pd(0.007812501848093234);
54712 assert_eq_m512d(r, e);
54713 }
54714
54715 #[simd_test(enable = "avx512f")]
54716 unsafe fn test_mm512_castps_si512() {
54717 let a = _mm512_set1_ps(1.);
54718 let r = _mm512_castps_si512(a);
54719 let e = _mm512_set1_epi32(1065353216);
54720 assert_eq_m512i(r, e);
54721 }
54722
54723 #[simd_test(enable = "avx512f")]
54724 unsafe fn test_mm512_broadcastd_epi32() {
54725 let a = _mm_set_epi32(17, 18, 19, 20);
54726 let r = _mm512_broadcastd_epi32(a);
54727 let e = _mm512_set1_epi32(20);
54728 assert_eq_m512i(r, e);
54729 }
54730
54731 #[simd_test(enable = "avx512f")]
54732 unsafe fn test_mm512_mask_broadcastd_epi32() {
54733 let src = _mm512_set1_epi32(20);
54734 let a = _mm_set_epi32(17, 18, 19, 20);
54735 let r = _mm512_mask_broadcastd_epi32(src, 0, a);
54736 assert_eq_m512i(r, src);
54737 let r = _mm512_mask_broadcastd_epi32(src, 0b11111111_11111111, a);
54738 let e = _mm512_set1_epi32(20);
54739 assert_eq_m512i(r, e);
54740 }
54741
54742 #[simd_test(enable = "avx512f")]
54743 unsafe fn test_mm512_maskz_broadcastd_epi32() {
54744 let a = _mm_set_epi32(17, 18, 19, 20);
54745 let r = _mm512_maskz_broadcastd_epi32(0, a);
54746 assert_eq_m512i(r, _mm512_setzero_si512());
54747 let r = _mm512_maskz_broadcastd_epi32(0b00000000_11111111, a);
54748 let e = _mm512_setr_epi32(20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0);
54749 assert_eq_m512i(r, e);
54750 }
54751
54752 #[simd_test(enable = "avx512f,avx512vl")]
54753 unsafe fn test_mm256_mask_broadcastd_epi32() {
54754 let src = _mm256_set1_epi32(20);
54755 let a = _mm_set_epi32(17, 18, 19, 20);
54756 let r = _mm256_mask_broadcastd_epi32(src, 0, a);
54757 assert_eq_m256i(r, src);
54758 let r = _mm256_mask_broadcastd_epi32(src, 0b11111111, a);
54759 let e = _mm256_set1_epi32(20);
54760 assert_eq_m256i(r, e);
54761 }
54762
54763 #[simd_test(enable = "avx512f,avx512vl")]
54764 unsafe fn test_mm256_maskz_broadcastd_epi32() {
54765 let a = _mm_set_epi32(17, 18, 19, 20);
54766 let r = _mm256_maskz_broadcastd_epi32(0, a);
54767 assert_eq_m256i(r, _mm256_setzero_si256());
54768 let r = _mm256_maskz_broadcastd_epi32(0b11111111, a);
54769 let e = _mm256_set1_epi32(20);
54770 assert_eq_m256i(r, e);
54771 }
54772
54773 #[simd_test(enable = "avx512f,avx512vl")]
54774 unsafe fn test_mm_mask_broadcastd_epi32() {
54775 let src = _mm_set1_epi32(20);
54776 let a = _mm_set_epi32(17, 18, 19, 20);
54777 let r = _mm_mask_broadcastd_epi32(src, 0, a);
54778 assert_eq_m128i(r, src);
54779 let r = _mm_mask_broadcastd_epi32(src, 0b00001111, a);
54780 let e = _mm_set1_epi32(20);
54781 assert_eq_m128i(r, e);
54782 }
54783
54784 #[simd_test(enable = "avx512f,avx512vl")]
54785 unsafe fn test_mm_maskz_broadcastd_epi32() {
54786 let a = _mm_set_epi32(17, 18, 19, 20);
54787 let r = _mm_maskz_broadcastd_epi32(0, a);
54788 assert_eq_m128i(r, _mm_setzero_si128());
54789 let r = _mm_maskz_broadcastd_epi32(0b00001111, a);
54790 let e = _mm_set1_epi32(20);
54791 assert_eq_m128i(r, e);
54792 }
54793
54794 #[simd_test(enable = "avx512f")]
54795 unsafe fn test_mm512_broadcastss_ps() {
54796 let a = _mm_set_ps(17., 18., 19., 20.);
54797 let r = _mm512_broadcastss_ps(a);
54798 let e = _mm512_set1_ps(20.);
54799 assert_eq_m512(r, e);
54800 }
54801
54802 #[simd_test(enable = "avx512f")]
54803 unsafe fn test_mm512_mask_broadcastss_ps() {
54804 let src = _mm512_set1_ps(20.);
54805 let a = _mm_set_ps(17., 18., 19., 20.);
54806 let r = _mm512_mask_broadcastss_ps(src, 0, a);
54807 assert_eq_m512(r, src);
54808 let r = _mm512_mask_broadcastss_ps(src, 0b11111111_11111111, a);
54809 let e = _mm512_set1_ps(20.);
54810 assert_eq_m512(r, e);
54811 }
54812
54813 #[simd_test(enable = "avx512f")]
54814 unsafe fn test_mm512_maskz_broadcastss_ps() {
54815 let a = _mm_set_ps(17., 18., 19., 20.);
54816 let r = _mm512_maskz_broadcastss_ps(0, a);
54817 assert_eq_m512(r, _mm512_setzero_ps());
54818 let r = _mm512_maskz_broadcastss_ps(0b00000000_11111111, a);
54819 let e = _mm512_setr_ps(
54820 20., 20., 20., 20., 20., 20., 20., 20., 0., 0., 0., 0., 0., 0., 0., 0.,
54821 );
54822 assert_eq_m512(r, e);
54823 }
54824
54825 #[simd_test(enable = "avx512f,avx512vl")]
54826 unsafe fn test_mm256_mask_broadcastss_ps() {
54827 let src = _mm256_set1_ps(20.);
54828 let a = _mm_set_ps(17., 18., 19., 20.);
54829 let r = _mm256_mask_broadcastss_ps(src, 0, a);
54830 assert_eq_m256(r, src);
54831 let r = _mm256_mask_broadcastss_ps(src, 0b11111111, a);
54832 let e = _mm256_set1_ps(20.);
54833 assert_eq_m256(r, e);
54834 }
54835
54836 #[simd_test(enable = "avx512f,avx512vl")]
54837 unsafe fn test_mm256_maskz_broadcastss_ps() {
54838 let a = _mm_set_ps(17., 18., 19., 20.);
54839 let r = _mm256_maskz_broadcastss_ps(0, a);
54840 assert_eq_m256(r, _mm256_setzero_ps());
54841 let r = _mm256_maskz_broadcastss_ps(0b11111111, a);
54842 let e = _mm256_set1_ps(20.);
54843 assert_eq_m256(r, e);
54844 }
54845
54846 #[simd_test(enable = "avx512f,avx512vl")]
54847 unsafe fn test_mm_mask_broadcastss_ps() {
54848 let src = _mm_set1_ps(20.);
54849 let a = _mm_set_ps(17., 18., 19., 20.);
54850 let r = _mm_mask_broadcastss_ps(src, 0, a);
54851 assert_eq_m128(r, src);
54852 let r = _mm_mask_broadcastss_ps(src, 0b00001111, a);
54853 let e = _mm_set1_ps(20.);
54854 assert_eq_m128(r, e);
54855 }
54856
54857 #[simd_test(enable = "avx512f,avx512vl")]
54858 unsafe fn test_mm_maskz_broadcastss_ps() {
54859 let a = _mm_set_ps(17., 18., 19., 20.);
54860 let r = _mm_maskz_broadcastss_ps(0, a);
54861 assert_eq_m128(r, _mm_setzero_ps());
54862 let r = _mm_maskz_broadcastss_ps(0b00001111, a);
54863 let e = _mm_set1_ps(20.);
54864 assert_eq_m128(r, e);
54865 }
54866
54867 #[simd_test(enable = "avx512f")]
54868 unsafe fn test_mm512_broadcast_i32x4() {
54869 let a = _mm_set_epi32(17, 18, 19, 20);
54870 let r = _mm512_broadcast_i32x4(a);
54871 let e = _mm512_set_epi32(
54872 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
54873 );
54874 assert_eq_m512i(r, e);
54875 }
54876
54877 #[simd_test(enable = "avx512f")]
54878 unsafe fn test_mm512_mask_broadcast_i32x4() {
54879 let src = _mm512_set1_epi32(20);
54880 let a = _mm_set_epi32(17, 18, 19, 20);
54881 let r = _mm512_mask_broadcast_i32x4(src, 0, a);
54882 assert_eq_m512i(r, src);
54883 let r = _mm512_mask_broadcast_i32x4(src, 0b11111111_11111111, a);
54884 let e = _mm512_set_epi32(
54885 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
54886 );
54887 assert_eq_m512i(r, e);
54888 }
54889
54890 #[simd_test(enable = "avx512f")]
54891 unsafe fn test_mm512_maskz_broadcast_i32x4() {
54892 let a = _mm_set_epi32(17, 18, 19, 20);
54893 let r = _mm512_maskz_broadcast_i32x4(0, a);
54894 assert_eq_m512i(r, _mm512_setzero_si512());
54895 let r = _mm512_maskz_broadcast_i32x4(0b00000000_11111111, a);
54896 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, 20, 17, 18, 19, 20);
54897 assert_eq_m512i(r, e);
54898 }
54899
54900 #[simd_test(enable = "avx512f,avx512vl")]
54901 unsafe fn test_mm256_broadcast_i32x4() {
54902 let a = _mm_set_epi32(17, 18, 19, 20);
54903 let r = _mm256_broadcast_i32x4(a);
54904 let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54905 assert_eq_m256i(r, e);
54906 }
54907
54908 #[simd_test(enable = "avx512f,avx512vl")]
54909 unsafe fn test_mm256_mask_broadcast_i32x4() {
54910 let src = _mm256_set1_epi32(20);
54911 let a = _mm_set_epi32(17, 18, 19, 20);
54912 let r = _mm256_mask_broadcast_i32x4(src, 0, a);
54913 assert_eq_m256i(r, src);
54914 let r = _mm256_mask_broadcast_i32x4(src, 0b11111111, a);
54915 let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54916 assert_eq_m256i(r, e);
54917 }
54918
54919 #[simd_test(enable = "avx512f,avx512vl")]
54920 unsafe fn test_mm256_maskz_broadcast_i32x4() {
54921 let a = _mm_set_epi32(17, 18, 19, 20);
54922 let r = _mm256_maskz_broadcast_i32x4(0, a);
54923 assert_eq_m256i(r, _mm256_setzero_si256());
54924 let r = _mm256_maskz_broadcast_i32x4(0b11111111, a);
54925 let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54926 assert_eq_m256i(r, e);
54927 }
54928
54929 #[simd_test(enable = "avx512f")]
54930 unsafe fn test_mm512_broadcast_f32x4() {
54931 let a = _mm_set_ps(17., 18., 19., 20.);
54932 let r = _mm512_broadcast_f32x4(a);
54933 let e = _mm512_set_ps(
54934 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
54935 );
54936 assert_eq_m512(r, e);
54937 }
54938
54939 #[simd_test(enable = "avx512f")]
54940 unsafe fn test_mm512_mask_broadcast_f32x4() {
54941 let src = _mm512_set1_ps(20.);
54942 let a = _mm_set_ps(17., 18., 19., 20.);
54943 let r = _mm512_mask_broadcast_f32x4(src, 0, a);
54944 assert_eq_m512(r, src);
54945 let r = _mm512_mask_broadcast_f32x4(src, 0b11111111_11111111, a);
54946 let e = _mm512_set_ps(
54947 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
54948 );
54949 assert_eq_m512(r, e);
54950 }
54951
54952 #[simd_test(enable = "avx512f")]
54953 unsafe fn test_mm512_maskz_broadcast_f32x4() {
54954 let a = _mm_set_ps(17., 18., 19., 20.);
54955 let r = _mm512_maskz_broadcast_f32x4(0, a);
54956 assert_eq_m512(r, _mm512_setzero_ps());
54957 let r = _mm512_maskz_broadcast_f32x4(0b00000000_11111111, a);
54958 let e = _mm512_set_ps(
54959 0., 0., 0., 0., 0., 0., 0., 0., 17., 18., 19., 20., 17., 18., 19., 20.,
54960 );
54961 assert_eq_m512(r, e);
54962 }
54963
54964 #[simd_test(enable = "avx512f,avx512vl")]
54965 unsafe fn test_mm256_broadcast_f32x4() {
54966 let a = _mm_set_ps(17., 18., 19., 20.);
54967 let r = _mm256_broadcast_f32x4(a);
54968 let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54969 assert_eq_m256(r, e);
54970 }
54971
54972 #[simd_test(enable = "avx512f,avx512vl")]
54973 unsafe fn test_mm256_mask_broadcast_f32x4() {
54974 let src = _mm256_set1_ps(20.);
54975 let a = _mm_set_ps(17., 18., 19., 20.);
54976 let r = _mm256_mask_broadcast_f32x4(src, 0, a);
54977 assert_eq_m256(r, src);
54978 let r = _mm256_mask_broadcast_f32x4(src, 0b11111111, a);
54979 let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54980 assert_eq_m256(r, e);
54981 }
54982
54983 #[simd_test(enable = "avx512f,avx512vl")]
54984 unsafe fn test_mm256_maskz_broadcast_f32x4() {
54985 let a = _mm_set_ps(17., 18., 19., 20.);
54986 let r = _mm256_maskz_broadcast_f32x4(0, a);
54987 assert_eq_m256(r, _mm256_setzero_ps());
54988 let r = _mm256_maskz_broadcast_f32x4(0b11111111, a);
54989 let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54990 assert_eq_m256(r, e);
54991 }
54992
54993 #[simd_test(enable = "avx512f")]
54994 unsafe fn test_mm512_mask_blend_epi32() {
54995 let a = _mm512_set1_epi32(1);
54996 let b = _mm512_set1_epi32(2);
54997 let r = _mm512_mask_blend_epi32(0b11111111_00000000, a, b);
54998 let e = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
54999 assert_eq_m512i(r, e);
55000 }
55001
55002 #[simd_test(enable = "avx512f,avx512vl")]
55003 unsafe fn test_mm256_mask_blend_epi32() {
55004 let a = _mm256_set1_epi32(1);
55005 let b = _mm256_set1_epi32(2);
55006 let r = _mm256_mask_blend_epi32(0b11111111, a, b);
55007 let e = _mm256_set1_epi32(2);
55008 assert_eq_m256i(r, e);
55009 }
55010
55011 #[simd_test(enable = "avx512f,avx512vl")]
55012 unsafe fn test_mm_mask_blend_epi32() {
55013 let a = _mm_set1_epi32(1);
55014 let b = _mm_set1_epi32(2);
55015 let r = _mm_mask_blend_epi32(0b00001111, a, b);
55016 let e = _mm_set1_epi32(2);
55017 assert_eq_m128i(r, e);
55018 }
55019
55020 #[simd_test(enable = "avx512f")]
55021 unsafe fn test_mm512_mask_blend_ps() {
55022 let a = _mm512_set1_ps(1.);
55023 let b = _mm512_set1_ps(2.);
55024 let r = _mm512_mask_blend_ps(0b11111111_00000000, a, b);
55025 let e = _mm512_set_ps(
55026 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
55027 );
55028 assert_eq_m512(r, e);
55029 }
55030
55031 #[simd_test(enable = "avx512f,avx512vl")]
55032 unsafe fn test_mm256_mask_blend_ps() {
55033 let a = _mm256_set1_ps(1.);
55034 let b = _mm256_set1_ps(2.);
55035 let r = _mm256_mask_blend_ps(0b11111111, a, b);
55036 let e = _mm256_set1_ps(2.);
55037 assert_eq_m256(r, e);
55038 }
55039
55040 #[simd_test(enable = "avx512f,avx512vl")]
55041 unsafe fn test_mm_mask_blend_ps() {
55042 let a = _mm_set1_ps(1.);
55043 let b = _mm_set1_ps(2.);
55044 let r = _mm_mask_blend_ps(0b00001111, a, b);
55045 let e = _mm_set1_ps(2.);
55046 assert_eq_m128(r, e);
55047 }
55048
55049 #[simd_test(enable = "avx512f")]
55050 unsafe fn test_mm512_unpackhi_epi32() {
55051 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55052 let b = _mm512_set_epi32(
55053 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55054 );
55055 let r = _mm512_unpackhi_epi32(a, b);
55056 let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
55057 assert_eq_m512i(r, e);
55058 }
55059
55060 #[simd_test(enable = "avx512f")]
55061 unsafe fn test_mm512_mask_unpackhi_epi32() {
55062 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55063 let b = _mm512_set_epi32(
55064 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55065 );
55066 let r = _mm512_mask_unpackhi_epi32(a, 0, a, b);
55067 assert_eq_m512i(r, a);
55068 let r = _mm512_mask_unpackhi_epi32(a, 0b11111111_11111111, a, b);
55069 let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
55070 assert_eq_m512i(r, e);
55071 }
55072
55073 #[simd_test(enable = "avx512f")]
55074 unsafe fn test_mm512_maskz_unpackhi_epi32() {
55075 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55076 let b = _mm512_set_epi32(
55077 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55078 );
55079 let r = _mm512_maskz_unpackhi_epi32(0, a, b);
55080 assert_eq_m512i(r, _mm512_setzero_si512());
55081 let r = _mm512_maskz_unpackhi_epi32(0b00000000_11111111, a, b);
55082 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 9, 26, 10, 29, 13, 30, 14);
55083 assert_eq_m512i(r, e);
55084 }
55085
55086 #[simd_test(enable = "avx512f,avx512vl")]
55087 unsafe fn test_mm256_mask_unpackhi_epi32() {
55088 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55089 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55090 let r = _mm256_mask_unpackhi_epi32(a, 0, a, b);
55091 assert_eq_m256i(r, a);
55092 let r = _mm256_mask_unpackhi_epi32(a, 0b11111111, a, b);
55093 let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
55094 assert_eq_m256i(r, e);
55095 }
55096
55097 #[simd_test(enable = "avx512f,avx512vl")]
55098 unsafe fn test_mm256_maskz_unpackhi_epi32() {
55099 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55100 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55101 let r = _mm256_maskz_unpackhi_epi32(0, a, b);
55102 assert_eq_m256i(r, _mm256_setzero_si256());
55103 let r = _mm256_maskz_unpackhi_epi32(0b11111111, a, b);
55104 let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
55105 assert_eq_m256i(r, e);
55106 }
55107
55108 #[simd_test(enable = "avx512f,avx512vl")]
55109 unsafe fn test_mm_mask_unpackhi_epi32() {
55110 let a = _mm_set_epi32(1, 2, 3, 4);
55111 let b = _mm_set_epi32(17, 18, 19, 20);
55112 let r = _mm_mask_unpackhi_epi32(a, 0, a, b);
55113 assert_eq_m128i(r, a);
55114 let r = _mm_mask_unpackhi_epi32(a, 0b00001111, a, b);
55115 let e = _mm_set_epi32(17, 1, 18, 2);
55116 assert_eq_m128i(r, e);
55117 }
55118
55119 #[simd_test(enable = "avx512f,avx512vl")]
55120 unsafe fn test_mm_maskz_unpackhi_epi32() {
55121 let a = _mm_set_epi32(1, 2, 3, 4);
55122 let b = _mm_set_epi32(17, 18, 19, 20);
55123 let r = _mm_maskz_unpackhi_epi32(0, a, b);
55124 assert_eq_m128i(r, _mm_setzero_si128());
55125 let r = _mm_maskz_unpackhi_epi32(0b00001111, a, b);
55126 let e = _mm_set_epi32(17, 1, 18, 2);
55127 assert_eq_m128i(r, e);
55128 }
55129
55130 #[simd_test(enable = "avx512f")]
55131 unsafe fn test_mm512_unpackhi_ps() {
55132 let a = _mm512_set_ps(
55133 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55134 );
55135 let b = _mm512_set_ps(
55136 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55137 );
55138 let r = _mm512_unpackhi_ps(a, b);
55139 let e = _mm512_set_ps(
55140 17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
55141 );
55142 assert_eq_m512(r, e);
55143 }
55144
55145 #[simd_test(enable = "avx512f")]
55146 unsafe fn test_mm512_mask_unpackhi_ps() {
55147 let a = _mm512_set_ps(
55148 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55149 );
55150 let b = _mm512_set_ps(
55151 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55152 );
55153 let r = _mm512_mask_unpackhi_ps(a, 0, a, b);
55154 assert_eq_m512(r, a);
55155 let r = _mm512_mask_unpackhi_ps(a, 0b11111111_11111111, a, b);
55156 let e = _mm512_set_ps(
55157 17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
55158 );
55159 assert_eq_m512(r, e);
55160 }
55161
55162 #[simd_test(enable = "avx512f")]
55163 unsafe fn test_mm512_maskz_unpackhi_ps() {
55164 let a = _mm512_set_ps(
55165 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55166 );
55167 let b = _mm512_set_ps(
55168 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55169 );
55170 let r = _mm512_maskz_unpackhi_ps(0, a, b);
55171 assert_eq_m512(r, _mm512_setzero_ps());
55172 let r = _mm512_maskz_unpackhi_ps(0b00000000_11111111, a, b);
55173 let e = _mm512_set_ps(
55174 0., 0., 0., 0., 0., 0., 0., 0., 25., 9., 26., 10., 29., 13., 30., 14.,
55175 );
55176 assert_eq_m512(r, e);
55177 }
55178
55179 #[simd_test(enable = "avx512f,avx512vl")]
55180 unsafe fn test_mm256_mask_unpackhi_ps() {
55181 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55182 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55183 let r = _mm256_mask_unpackhi_ps(a, 0, a, b);
55184 assert_eq_m256(r, a);
55185 let r = _mm256_mask_unpackhi_ps(a, 0b11111111, a, b);
55186 let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
55187 assert_eq_m256(r, e);
55188 }
55189
55190 #[simd_test(enable = "avx512f,avx512vl")]
55191 unsafe fn test_mm256_maskz_unpackhi_ps() {
55192 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55193 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55194 let r = _mm256_maskz_unpackhi_ps(0, a, b);
55195 assert_eq_m256(r, _mm256_setzero_ps());
55196 let r = _mm256_maskz_unpackhi_ps(0b11111111, a, b);
55197 let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
55198 assert_eq_m256(r, e);
55199 }
55200
55201 #[simd_test(enable = "avx512f,avx512vl")]
55202 unsafe fn test_mm_mask_unpackhi_ps() {
55203 let a = _mm_set_ps(1., 2., 3., 4.);
55204 let b = _mm_set_ps(17., 18., 19., 20.);
55205 let r = _mm_mask_unpackhi_ps(a, 0, a, b);
55206 assert_eq_m128(r, a);
55207 let r = _mm_mask_unpackhi_ps(a, 0b00001111, a, b);
55208 let e = _mm_set_ps(17., 1., 18., 2.);
55209 assert_eq_m128(r, e);
55210 }
55211
55212 #[simd_test(enable = "avx512f,avx512vl")]
55213 unsafe fn test_mm_maskz_unpackhi_ps() {
55214 let a = _mm_set_ps(1., 2., 3., 4.);
55215 let b = _mm_set_ps(17., 18., 19., 20.);
55216 let r = _mm_maskz_unpackhi_ps(0, a, b);
55217 assert_eq_m128(r, _mm_setzero_ps());
55218 let r = _mm_maskz_unpackhi_ps(0b00001111, a, b);
55219 let e = _mm_set_ps(17., 1., 18., 2.);
55220 assert_eq_m128(r, e);
55221 }
55222
55223 #[simd_test(enable = "avx512f")]
55224 unsafe fn test_mm512_unpacklo_epi32() {
55225 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55226 let b = _mm512_set_epi32(
55227 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55228 );
55229 let r = _mm512_unpacklo_epi32(a, b);
55230 let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
55231 assert_eq_m512i(r, e);
55232 }
55233
55234 #[simd_test(enable = "avx512f")]
55235 unsafe fn test_mm512_mask_unpacklo_epi32() {
55236 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55237 let b = _mm512_set_epi32(
55238 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55239 );
55240 let r = _mm512_mask_unpacklo_epi32(a, 0, a, b);
55241 assert_eq_m512i(r, a);
55242 let r = _mm512_mask_unpacklo_epi32(a, 0b11111111_11111111, a, b);
55243 let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
55244 assert_eq_m512i(r, e);
55245 }
55246
55247 #[simd_test(enable = "avx512f")]
55248 unsafe fn test_mm512_maskz_unpacklo_epi32() {
55249 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55250 let b = _mm512_set_epi32(
55251 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55252 );
55253 let r = _mm512_maskz_unpacklo_epi32(0, a, b);
55254 assert_eq_m512i(r, _mm512_setzero_si512());
55255 let r = _mm512_maskz_unpacklo_epi32(0b00000000_11111111, a, b);
55256 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 27, 11, 28, 12, 31, 15, 32, 16);
55257 assert_eq_m512i(r, e);
55258 }
55259
55260 #[simd_test(enable = "avx512f,avx512vl")]
55261 unsafe fn test_mm256_mask_unpacklo_epi32() {
55262 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55263 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55264 let r = _mm256_mask_unpacklo_epi32(a, 0, a, b);
55265 assert_eq_m256i(r, a);
55266 let r = _mm256_mask_unpacklo_epi32(a, 0b11111111, a, b);
55267 let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
55268 assert_eq_m256i(r, e);
55269 }
55270
55271 #[simd_test(enable = "avx512f,avx512vl")]
55272 unsafe fn test_mm256_maskz_unpacklo_epi32() {
55273 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55274 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55275 let r = _mm256_maskz_unpacklo_epi32(0, a, b);
55276 assert_eq_m256i(r, _mm256_setzero_si256());
55277 let r = _mm256_maskz_unpacklo_epi32(0b11111111, a, b);
55278 let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
55279 assert_eq_m256i(r, e);
55280 }
55281
55282 #[simd_test(enable = "avx512f,avx512vl")]
55283 unsafe fn test_mm_mask_unpacklo_epi32() {
55284 let a = _mm_set_epi32(1, 2, 3, 4);
55285 let b = _mm_set_epi32(17, 18, 19, 20);
55286 let r = _mm_mask_unpacklo_epi32(a, 0, a, b);
55287 assert_eq_m128i(r, a);
55288 let r = _mm_mask_unpacklo_epi32(a, 0b00001111, a, b);
55289 let e = _mm_set_epi32(19, 3, 20, 4);
55290 assert_eq_m128i(r, e);
55291 }
55292
55293 #[simd_test(enable = "avx512f,avx512vl")]
55294 unsafe fn test_mm_maskz_unpacklo_epi32() {
55295 let a = _mm_set_epi32(1, 2, 3, 4);
55296 let b = _mm_set_epi32(17, 18, 19, 20);
55297 let r = _mm_maskz_unpacklo_epi32(0, a, b);
55298 assert_eq_m128i(r, _mm_setzero_si128());
55299 let r = _mm_maskz_unpacklo_epi32(0b00001111, a, b);
55300 let e = _mm_set_epi32(19, 3, 20, 4);
55301 assert_eq_m128i(r, e);
55302 }
55303
55304 #[simd_test(enable = "avx512f")]
55305 unsafe fn test_mm512_unpacklo_ps() {
55306 let a = _mm512_set_ps(
55307 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55308 );
55309 let b = _mm512_set_ps(
55310 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55311 );
55312 let r = _mm512_unpacklo_ps(a, b);
55313 let e = _mm512_set_ps(
55314 19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
55315 );
55316 assert_eq_m512(r, e);
55317 }
55318
55319 #[simd_test(enable = "avx512f")]
55320 unsafe fn test_mm512_mask_unpacklo_ps() {
55321 let a = _mm512_set_ps(
55322 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55323 );
55324 let b = _mm512_set_ps(
55325 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55326 );
55327 let r = _mm512_mask_unpacklo_ps(a, 0, a, b);
55328 assert_eq_m512(r, a);
55329 let r = _mm512_mask_unpacklo_ps(a, 0b11111111_11111111, a, b);
55330 let e = _mm512_set_ps(
55331 19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
55332 );
55333 assert_eq_m512(r, e);
55334 }
55335
55336 #[simd_test(enable = "avx512f")]
55337 unsafe fn test_mm512_maskz_unpacklo_ps() {
55338 let a = _mm512_set_ps(
55339 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55340 );
55341 let b = _mm512_set_ps(
55342 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55343 );
55344 let r = _mm512_maskz_unpacklo_ps(0, a, b);
55345 assert_eq_m512(r, _mm512_setzero_ps());
55346 let r = _mm512_maskz_unpacklo_ps(0b00000000_11111111, a, b);
55347 let e = _mm512_set_ps(
55348 0., 0., 0., 0., 0., 0., 0., 0., 27., 11., 28., 12., 31., 15., 32., 16.,
55349 );
55350 assert_eq_m512(r, e);
55351 }
55352
55353 #[simd_test(enable = "avx512f,avx512vl")]
55354 unsafe fn test_mm256_mask_unpacklo_ps() {
55355 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55356 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55357 let r = _mm256_mask_unpacklo_ps(a, 0, a, b);
55358 assert_eq_m256(r, a);
55359 let r = _mm256_mask_unpacklo_ps(a, 0b11111111, a, b);
55360 let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
55361 assert_eq_m256(r, e);
55362 }
55363
55364 #[simd_test(enable = "avx512f,avx512vl")]
55365 unsafe fn test_mm256_maskz_unpacklo_ps() {
55366 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55367 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55368 let r = _mm256_maskz_unpacklo_ps(0, a, b);
55369 assert_eq_m256(r, _mm256_setzero_ps());
55370 let r = _mm256_maskz_unpacklo_ps(0b11111111, a, b);
55371 let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
55372 assert_eq_m256(r, e);
55373 }
55374
55375 #[simd_test(enable = "avx512f,avx512vl")]
55376 unsafe fn test_mm_mask_unpacklo_ps() {
55377 let a = _mm_set_ps(1., 2., 3., 4.);
55378 let b = _mm_set_ps(17., 18., 19., 20.);
55379 let r = _mm_mask_unpacklo_ps(a, 0, a, b);
55380 assert_eq_m128(r, a);
55381 let r = _mm_mask_unpacklo_ps(a, 0b00001111, a, b);
55382 let e = _mm_set_ps(19., 3., 20., 4.);
55383 assert_eq_m128(r, e);
55384 }
55385
55386 #[simd_test(enable = "avx512f,avx512vl")]
55387 unsafe fn test_mm_maskz_unpacklo_ps() {
55388 let a = _mm_set_ps(1., 2., 3., 4.);
55389 let b = _mm_set_ps(17., 18., 19., 20.);
55390 let r = _mm_maskz_unpacklo_ps(0, a, b);
55391 assert_eq_m128(r, _mm_setzero_ps());
55392 let r = _mm_maskz_unpacklo_ps(0b00001111, a, b);
55393 let e = _mm_set_ps(19., 3., 20., 4.);
55394 assert_eq_m128(r, e);
55395 }
55396
55397 #[simd_test(enable = "avx512f")]
55398 unsafe fn test_mm512_alignr_epi32() {
55399 let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55400 let b = _mm512_set_epi32(
55401 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55402 );
55403 let r = _mm512_alignr_epi32::<0>(a, b);
55404 assert_eq_m512i(r, b);
55405 let r = _mm512_alignr_epi32::<16>(a, b);
55406 assert_eq_m512i(r, b);
55407 let r = _mm512_alignr_epi32::<1>(a, b);
55408 let e = _mm512_set_epi32(
55409 1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
55410 );
55411 assert_eq_m512i(r, e);
55412 }
55413
55414 #[simd_test(enable = "avx512f")]
55415 unsafe fn test_mm512_mask_alignr_epi32() {
55416 let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55417 let b = _mm512_set_epi32(
55418 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55419 );
55420 let r = _mm512_mask_alignr_epi32::<1>(a, 0, a, b);
55421 assert_eq_m512i(r, a);
55422 let r = _mm512_mask_alignr_epi32::<1>(a, 0b11111111_11111111, a, b);
55423 let e = _mm512_set_epi32(
55424 1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
55425 );
55426 assert_eq_m512i(r, e);
55427 }
55428
55429 #[simd_test(enable = "avx512f")]
55430 unsafe fn test_mm512_maskz_alignr_epi32() {
55431 let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55432 let b = _mm512_set_epi32(
55433 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55434 );
55435 let r = _mm512_maskz_alignr_epi32::<1>(0, a, b);
55436 assert_eq_m512i(r, _mm512_setzero_si512());
55437 let r = _mm512_maskz_alignr_epi32::<1>(0b00000000_11111111, a, b);
55438 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 24, 23, 22, 21, 20, 19, 18);
55439 assert_eq_m512i(r, e);
55440 }
55441
55442 #[simd_test(enable = "avx512f,avx512vl")]
55443 unsafe fn test_mm256_alignr_epi32() {
55444 let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55445 let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55446 let r = _mm256_alignr_epi32::<0>(a, b);
55447 assert_eq_m256i(r, b);
55448 let r = _mm256_alignr_epi32::<1>(a, b);
55449 let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55450 assert_eq_m256i(r, e);
55451 }
55452
55453 #[simd_test(enable = "avx512f,avx512vl")]
55454 unsafe fn test_mm256_mask_alignr_epi32() {
55455 let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55456 let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55457 let r = _mm256_mask_alignr_epi32::<1>(a, 0, a, b);
55458 assert_eq_m256i(r, a);
55459 let r = _mm256_mask_alignr_epi32::<1>(a, 0b11111111, a, b);
55460 let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55461 assert_eq_m256i(r, e);
55462 }
55463
55464 #[simd_test(enable = "avx512f,avx512vl")]
55465 unsafe fn test_mm256_maskz_alignr_epi32() {
55466 let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55467 let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55468 let r = _mm256_maskz_alignr_epi32::<1>(0, a, b);
55469 assert_eq_m256i(r, _mm256_setzero_si256());
55470 let r = _mm256_maskz_alignr_epi32::<1>(0b11111111, a, b);
55471 let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55472 assert_eq_m256i(r, e);
55473 }
55474
55475 #[simd_test(enable = "avx512f,avx512vl")]
55476 unsafe fn test_mm_alignr_epi32() {
55477 let a = _mm_set_epi32(4, 3, 2, 1);
55478 let b = _mm_set_epi32(8, 7, 6, 5);
55479 let r = _mm_alignr_epi32::<0>(a, b);
55480 assert_eq_m128i(r, b);
55481 let r = _mm_alignr_epi32::<1>(a, b);
55482 let e = _mm_set_epi32(1, 8, 7, 6);
55483 assert_eq_m128i(r, e);
55484 }
55485
55486 #[simd_test(enable = "avx512f,avx512vl")]
55487 unsafe fn test_mm_mask_alignr_epi32() {
55488 let a = _mm_set_epi32(4, 3, 2, 1);
55489 let b = _mm_set_epi32(8, 7, 6, 5);
55490 let r = _mm_mask_alignr_epi32::<1>(a, 0, a, b);
55491 assert_eq_m128i(r, a);
55492 let r = _mm_mask_alignr_epi32::<1>(a, 0b00001111, a, b);
55493 let e = _mm_set_epi32(1, 8, 7, 6);
55494 assert_eq_m128i(r, e);
55495 }
55496
55497 #[simd_test(enable = "avx512f,avx512vl")]
55498 unsafe fn test_mm_maskz_alignr_epi32() {
55499 let a = _mm_set_epi32(4, 3, 2, 1);
55500 let b = _mm_set_epi32(8, 7, 6, 5);
55501 let r = _mm_maskz_alignr_epi32::<1>(0, a, b);
55502 assert_eq_m128i(r, _mm_setzero_si128());
55503 let r = _mm_maskz_alignr_epi32::<1>(0b00001111, a, b);
55504 let e = _mm_set_epi32(1, 8, 7, 6);
55505 assert_eq_m128i(r, e);
55506 }
55507
55508 #[simd_test(enable = "avx512f")]
55509 unsafe fn test_mm512_and_epi32() {
55510 #[rustfmt::skip]
55511 let a = _mm512_set_epi32(
55512 1 << 1 | 1 << 2, 0, 0, 0,
55513 0, 0, 0, 0,
55514 0, 0, 0, 0,
55515 0, 0, 0, 1 << 1 | 1 << 3,
55516 );
55517 #[rustfmt::skip]
55518 let b = _mm512_set_epi32(
55519 1 << 1, 0, 0, 0,
55520 0, 0, 0, 0,
55521 0, 0, 0, 0,
55522 0, 0, 0, 1 << 3 | 1 << 4,
55523 );
55524 let r = _mm512_and_epi32(a, b);
55525 let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55526 assert_eq_m512i(r, e);
55527 }
55528
55529 #[simd_test(enable = "avx512f")]
55530 unsafe fn test_mm512_mask_and_epi32() {
55531 #[rustfmt::skip]
55532 let a = _mm512_set_epi32(
55533 1 << 1 | 1 << 2, 0, 0, 0,
55534 0, 0, 0, 0,
55535 0, 0, 0, 0,
55536 0, 0, 0, 1 << 1 | 1 << 3,
55537 );
55538 #[rustfmt::skip]
55539 let b = _mm512_set_epi32(
55540 1 << 1, 0, 0, 0,
55541 0, 0, 0, 0,
55542 0, 0, 0, 0,
55543 0, 0, 0, 1 << 3 | 1 << 4,
55544 );
55545 let r = _mm512_mask_and_epi32(a, 0, a, b);
55546 assert_eq_m512i(r, a);
55547 let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
55548 #[rustfmt::skip]
55549 let e = _mm512_set_epi32(
55550 1 << 1 | 1 << 2, 0, 0, 0,
55551 0, 0, 0, 0,
55552 0, 0, 0, 0,
55553 0, 0, 0, 1 << 3,
55554 );
55555 assert_eq_m512i(r, e);
55556 }
55557
55558 #[simd_test(enable = "avx512f")]
55559 unsafe fn test_mm512_maskz_and_epi32() {
55560 #[rustfmt::skip]
55561 let a = _mm512_set_epi32(
55562 1 << 1 | 1 << 2, 0, 0, 0,
55563 0, 0, 0, 0,
55564 0, 0, 0, 0,
55565 0, 0, 0, 1 << 1 | 1 << 3,
55566 );
55567 #[rustfmt::skip]
55568 let b = _mm512_set_epi32(
55569 1 << 1, 0, 0, 0,
55570 0, 0, 0, 0,
55571 0, 0, 0, 0,
55572 0, 0, 0, 1 << 3 | 1 << 4,
55573 );
55574 let r = _mm512_maskz_and_epi32(0, a, b);
55575 assert_eq_m512i(r, _mm512_setzero_si512());
55576 let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
55577 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55578 assert_eq_m512i(r, e);
55579 }
55580
55581 #[simd_test(enable = "avx512f,avx512vl")]
55582 unsafe fn test_mm256_mask_and_epi32() {
55583 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55584 let b = _mm256_set1_epi32(1 << 1);
55585 let r = _mm256_mask_and_epi32(a, 0, a, b);
55586 assert_eq_m256i(r, a);
55587 let r = _mm256_mask_and_epi32(a, 0b11111111, a, b);
55588 let e = _mm256_set1_epi32(1 << 1);
55589 assert_eq_m256i(r, e);
55590 }
55591
55592 #[simd_test(enable = "avx512f,avx512vl")]
55593 unsafe fn test_mm256_maskz_and_epi32() {
55594 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55595 let b = _mm256_set1_epi32(1 << 1);
55596 let r = _mm256_maskz_and_epi32(0, a, b);
55597 assert_eq_m256i(r, _mm256_setzero_si256());
55598 let r = _mm256_maskz_and_epi32(0b11111111, a, b);
55599 let e = _mm256_set1_epi32(1 << 1);
55600 assert_eq_m256i(r, e);
55601 }
55602
55603 #[simd_test(enable = "avx512f,avx512vl")]
55604 unsafe fn test_mm_mask_and_epi32() {
55605 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55606 let b = _mm_set1_epi32(1 << 1);
55607 let r = _mm_mask_and_epi32(a, 0, a, b);
55608 assert_eq_m128i(r, a);
55609 let r = _mm_mask_and_epi32(a, 0b00001111, a, b);
55610 let e = _mm_set1_epi32(1 << 1);
55611 assert_eq_m128i(r, e);
55612 }
55613
55614 #[simd_test(enable = "avx512f,avx512vl")]
55615 unsafe fn test_mm_maskz_and_epi32() {
55616 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55617 let b = _mm_set1_epi32(1 << 1);
55618 let r = _mm_maskz_and_epi32(0, a, b);
55619 assert_eq_m128i(r, _mm_setzero_si128());
55620 let r = _mm_maskz_and_epi32(0b00001111, a, b);
55621 let e = _mm_set1_epi32(1 << 1);
55622 assert_eq_m128i(r, e);
55623 }
55624
55625 #[simd_test(enable = "avx512f")]
55626 unsafe fn test_mm512_and_si512() {
55627 #[rustfmt::skip]
55628 let a = _mm512_set_epi32(
55629 1 << 1 | 1 << 2, 0, 0, 0,
55630 0, 0, 0, 0,
55631 0, 0, 0, 0,
55632 0, 0, 0, 1 << 1 | 1 << 3,
55633 );
55634 #[rustfmt::skip]
55635 let b = _mm512_set_epi32(
55636 1 << 1, 0, 0, 0,
55637 0, 0, 0, 0,
55638 0, 0, 0, 0,
55639 0, 0, 0, 1 << 3 | 1 << 4,
55640 );
55641 let r = _mm512_and_epi32(a, b);
55642 let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55643 assert_eq_m512i(r, e);
55644 }
55645
55646 #[simd_test(enable = "avx512f")]
55647 unsafe fn test_mm512_or_epi32() {
55648 #[rustfmt::skip]
55649 let a = _mm512_set_epi32(
55650 1 << 1 | 1 << 2, 0, 0, 0,
55651 0, 0, 0, 0,
55652 0, 0, 0, 0,
55653 0, 0, 0, 1 << 1 | 1 << 3,
55654 );
55655 #[rustfmt::skip]
55656 let b = _mm512_set_epi32(
55657 1 << 1, 0, 0, 0,
55658 0, 0, 0, 0,
55659 0, 0, 0, 0,
55660 0, 0, 0, 1 << 3 | 1 << 4,
55661 );
55662 let r = _mm512_or_epi32(a, b);
55663 #[rustfmt::skip]
55664 let e = _mm512_set_epi32(
55665 1 << 1 | 1 << 2, 0, 0, 0,
55666 0, 0, 0, 0,
55667 0, 0, 0, 0,
55668 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55669 );
55670 assert_eq_m512i(r, e);
55671 }
55672
55673 #[simd_test(enable = "avx512f")]
55674 unsafe fn test_mm512_mask_or_epi32() {
55675 #[rustfmt::skip]
55676 let a = _mm512_set_epi32(
55677 1 << 1 | 1 << 2, 0, 0, 0,
55678 0, 0, 0, 0,
55679 0, 0, 0, 0,
55680 0, 0, 0, 1 << 1 | 1 << 3,
55681 );
55682 #[rustfmt::skip]
55683 let b = _mm512_set_epi32(
55684 1 << 1, 0, 0, 0,
55685 0, 0, 0, 0,
55686 0, 0, 0, 0,
55687 0, 0, 0, 1 << 3 | 1 << 4,
55688 );
55689 let r = _mm512_mask_or_epi32(a, 0, a, b);
55690 assert_eq_m512i(r, a);
55691 let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
55692 #[rustfmt::skip]
55693 let e = _mm512_set_epi32(
55694 1 << 1 | 1 << 2, 0, 0, 0,
55695 0, 0, 0, 0,
55696 0, 0, 0, 0,
55697 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55698 );
55699 assert_eq_m512i(r, e);
55700 }
55701
55702 #[simd_test(enable = "avx512f")]
55703 unsafe fn test_mm512_maskz_or_epi32() {
55704 #[rustfmt::skip]
55705 let a = _mm512_set_epi32(
55706 1 << 1 | 1 << 2, 0, 0, 0,
55707 0, 0, 0, 0,
55708 0, 0, 0, 0,
55709 0, 0, 0, 1 << 1 | 1 << 3,
55710 );
55711 #[rustfmt::skip]
55712 let b = _mm512_set_epi32(
55713 1 << 1, 0, 0, 0,
55714 0, 0, 0, 0,
55715 0, 0, 0, 0,
55716 0, 0, 0, 1 << 3 | 1 << 4,
55717 );
55718 let r = _mm512_maskz_or_epi32(0, a, b);
55719 assert_eq_m512i(r, _mm512_setzero_si512());
55720 let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
55721 #[rustfmt::skip]
55722 let e = _mm512_set_epi32(
55723 0, 0, 0, 0,
55724 0, 0, 0, 0,
55725 0, 0, 0, 0,
55726 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55727 );
55728 assert_eq_m512i(r, e);
55729 }
55730
55731 #[simd_test(enable = "avx512f,avx512vl")]
55732 unsafe fn test_mm256_or_epi32() {
55733 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55734 let b = _mm256_set1_epi32(1 << 1);
55735 let r = _mm256_or_epi32(a, b);
55736 let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55737 assert_eq_m256i(r, e);
55738 }
55739
55740 #[simd_test(enable = "avx512f,avx512vl")]
55741 unsafe fn test_mm256_mask_or_epi32() {
55742 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55743 let b = _mm256_set1_epi32(1 << 1);
55744 let r = _mm256_mask_or_epi32(a, 0, a, b);
55745 assert_eq_m256i(r, a);
55746 let r = _mm256_mask_or_epi32(a, 0b11111111, a, b);
55747 let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55748 assert_eq_m256i(r, e);
55749 }
55750
55751 #[simd_test(enable = "avx512f,avx512vl")]
55752 unsafe fn test_mm256_maskz_or_epi32() {
55753 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55754 let b = _mm256_set1_epi32(1 << 1);
55755 let r = _mm256_maskz_or_epi32(0, a, b);
55756 assert_eq_m256i(r, _mm256_setzero_si256());
55757 let r = _mm256_maskz_or_epi32(0b11111111, a, b);
55758 let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55759 assert_eq_m256i(r, e);
55760 }
55761
55762 #[simd_test(enable = "avx512f,avx512vl")]
55763 unsafe fn test_mm_or_epi32() {
55764 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55765 let b = _mm_set1_epi32(1 << 1);
55766 let r = _mm_or_epi32(a, b);
55767 let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55768 assert_eq_m128i(r, e);
55769 }
55770
55771 #[simd_test(enable = "avx512f,avx512vl")]
55772 unsafe fn test_mm_mask_or_epi32() {
55773 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55774 let b = _mm_set1_epi32(1 << 1);
55775 let r = _mm_mask_or_epi32(a, 0, a, b);
55776 assert_eq_m128i(r, a);
55777 let r = _mm_mask_or_epi32(a, 0b00001111, a, b);
55778 let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55779 assert_eq_m128i(r, e);
55780 }
55781
55782 #[simd_test(enable = "avx512f,avx512vl")]
55783 unsafe fn test_mm_maskz_or_epi32() {
55784 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55785 let b = _mm_set1_epi32(1 << 1);
55786 let r = _mm_maskz_or_epi32(0, a, b);
55787 assert_eq_m128i(r, _mm_setzero_si128());
55788 let r = _mm_maskz_or_epi32(0b00001111, a, b);
55789 let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55790 assert_eq_m128i(r, e);
55791 }
55792
55793 #[simd_test(enable = "avx512f")]
55794 unsafe fn test_mm512_or_si512() {
55795 #[rustfmt::skip]
55796 let a = _mm512_set_epi32(
55797 1 << 1 | 1 << 2, 0, 0, 0,
55798 0, 0, 0, 0,
55799 0, 0, 0, 0,
55800 0, 0, 0, 1 << 1 | 1 << 3,
55801 );
55802 #[rustfmt::skip]
55803 let b = _mm512_set_epi32(
55804 1 << 1, 0, 0, 0,
55805 0, 0, 0, 0,
55806 0, 0, 0, 0,
55807 0, 0, 0, 1 << 3 | 1 << 4,
55808 );
55809 let r = _mm512_or_epi32(a, b);
55810 #[rustfmt::skip]
55811 let e = _mm512_set_epi32(
55812 1 << 1 | 1 << 2, 0, 0, 0,
55813 0, 0, 0, 0,
55814 0, 0, 0, 0,
55815 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55816 );
55817 assert_eq_m512i(r, e);
55818 }
55819
55820 #[simd_test(enable = "avx512f")]
55821 unsafe fn test_mm512_xor_epi32() {
55822 #[rustfmt::skip]
55823 let a = _mm512_set_epi32(
55824 1 << 1 | 1 << 2, 0, 0, 0,
55825 0, 0, 0, 0,
55826 0, 0, 0, 0,
55827 0, 0, 0, 1 << 1 | 1 << 3,
55828 );
55829 #[rustfmt::skip]
55830 let b = _mm512_set_epi32(
55831 1 << 1, 0, 0, 0,
55832 0, 0, 0, 0,
55833 0, 0, 0, 0,
55834 0, 0, 0, 1 << 3 | 1 << 4,
55835 );
55836 let r = _mm512_xor_epi32(a, b);
55837 #[rustfmt::skip]
55838 let e = _mm512_set_epi32(
55839 1 << 2, 0, 0, 0,
55840 0, 0, 0, 0,
55841 0, 0, 0, 0,
55842 0, 0, 0, 1 << 1 | 1 << 4,
55843 );
55844 assert_eq_m512i(r, e);
55845 }
55846
55847 #[simd_test(enable = "avx512f")]
55848 unsafe fn test_mm512_mask_xor_epi32() {
55849 #[rustfmt::skip]
55850 let a = _mm512_set_epi32(
55851 1 << 1 | 1 << 2, 0, 0, 0,
55852 0, 0, 0, 0,
55853 0, 0, 0, 0,
55854 0, 0, 0, 1 << 1 | 1 << 3,
55855 );
55856 #[rustfmt::skip]
55857 let b = _mm512_set_epi32(
55858 1 << 1, 0, 0, 0,
55859 0, 0, 0, 0,
55860 0, 0, 0, 0,
55861 0, 0, 0, 1 << 3 | 1 << 4,
55862 );
55863 let r = _mm512_mask_xor_epi32(a, 0, a, b);
55864 assert_eq_m512i(r, a);
55865 let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
55866 #[rustfmt::skip]
55867 let e = _mm512_set_epi32(
55868 1 << 1 | 1 << 2, 0, 0, 0,
55869 0, 0, 0, 0,
55870 0, 0, 0, 0,
55871 0, 0, 0, 1 << 1 | 1 << 4,
55872 );
55873 assert_eq_m512i(r, e);
55874 }
55875
55876 #[simd_test(enable = "avx512f")]
55877 unsafe fn test_mm512_maskz_xor_epi32() {
55878 #[rustfmt::skip]
55879 let a = _mm512_set_epi32(
55880 1 << 1 | 1 << 2, 0, 0, 0,
55881 0, 0, 0, 0,
55882 0, 0, 0, 0,
55883 0, 0, 0, 1 << 1 | 1 << 3,
55884 );
55885 #[rustfmt::skip]
55886 let b = _mm512_set_epi32(
55887 1 << 1, 0, 0, 0,
55888 0, 0, 0, 0,
55889 0, 0, 0, 0,
55890 0, 0, 0, 1 << 3 | 1 << 4,
55891 );
55892 let r = _mm512_maskz_xor_epi32(0, a, b);
55893 assert_eq_m512i(r, _mm512_setzero_si512());
55894 let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
55895 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
55896 assert_eq_m512i(r, e);
55897 }
55898
55899 #[simd_test(enable = "avx512f,avx512vl")]
55900 unsafe fn test_mm256_xor_epi32() {
55901 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55902 let b = _mm256_set1_epi32(1 << 1);
55903 let r = _mm256_xor_epi32(a, b);
55904 let e = _mm256_set1_epi32(1 << 2);
55905 assert_eq_m256i(r, e);
55906 }
55907
55908 #[simd_test(enable = "avx512f,avx512vl")]
55909 unsafe fn test_mm256_mask_xor_epi32() {
55910 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55911 let b = _mm256_set1_epi32(1 << 1);
55912 let r = _mm256_mask_xor_epi32(a, 0, a, b);
55913 assert_eq_m256i(r, a);
55914 let r = _mm256_mask_xor_epi32(a, 0b11111111, a, b);
55915 let e = _mm256_set1_epi32(1 << 2);
55916 assert_eq_m256i(r, e);
55917 }
55918
55919 #[simd_test(enable = "avx512f,avx512vl")]
55920 unsafe fn test_mm256_maskz_xor_epi32() {
55921 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55922 let b = _mm256_set1_epi32(1 << 1);
55923 let r = _mm256_maskz_xor_epi32(0, a, b);
55924 assert_eq_m256i(r, _mm256_setzero_si256());
55925 let r = _mm256_maskz_xor_epi32(0b11111111, a, b);
55926 let e = _mm256_set1_epi32(1 << 2);
55927 assert_eq_m256i(r, e);
55928 }
55929
55930 #[simd_test(enable = "avx512f,avx512vl")]
55931 unsafe fn test_mm_xor_epi32() {
55932 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55933 let b = _mm_set1_epi32(1 << 1);
55934 let r = _mm_xor_epi32(a, b);
55935 let e = _mm_set1_epi32(1 << 2);
55936 assert_eq_m128i(r, e);
55937 }
55938
55939 #[simd_test(enable = "avx512f,avx512vl")]
55940 unsafe fn test_mm_mask_xor_epi32() {
55941 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55942 let b = _mm_set1_epi32(1 << 1);
55943 let r = _mm_mask_xor_epi32(a, 0, a, b);
55944 assert_eq_m128i(r, a);
55945 let r = _mm_mask_xor_epi32(a, 0b00001111, a, b);
55946 let e = _mm_set1_epi32(1 << 2);
55947 assert_eq_m128i(r, e);
55948 }
55949
55950 #[simd_test(enable = "avx512f,avx512vl")]
55951 unsafe fn test_mm_maskz_xor_epi32() {
55952 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55953 let b = _mm_set1_epi32(1 << 1);
55954 let r = _mm_maskz_xor_epi32(0, a, b);
55955 assert_eq_m128i(r, _mm_setzero_si128());
55956 let r = _mm_maskz_xor_epi32(0b00001111, a, b);
55957 let e = _mm_set1_epi32(1 << 2);
55958 assert_eq_m128i(r, e);
55959 }
55960
55961 #[simd_test(enable = "avx512f")]
55962 unsafe fn test_mm512_xor_si512() {
55963 #[rustfmt::skip]
55964 let a = _mm512_set_epi32(
55965 1 << 1 | 1 << 2, 0, 0, 0,
55966 0, 0, 0, 0,
55967 0, 0, 0, 0,
55968 0, 0, 0, 1 << 1 | 1 << 3,
55969 );
55970 #[rustfmt::skip]
55971 let b = _mm512_set_epi32(
55972 1 << 1, 0, 0, 0,
55973 0, 0, 0, 0,
55974 0, 0, 0, 0,
55975 0, 0, 0, 1 << 3 | 1 << 4,
55976 );
55977 let r = _mm512_xor_epi32(a, b);
55978 #[rustfmt::skip]
55979 let e = _mm512_set_epi32(
55980 1 << 2, 0, 0, 0,
55981 0, 0, 0, 0,
55982 0, 0, 0, 0,
55983 0, 0, 0, 1 << 1 | 1 << 4,
55984 );
55985 assert_eq_m512i(r, e);
55986 }
55987
55988 #[simd_test(enable = "avx512f")]
55989 unsafe fn test_mm512_andnot_epi32() {
55990 let a = _mm512_set1_epi32(0);
55991 let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
55992 let r = _mm512_andnot_epi32(a, b);
55993 let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
55994 assert_eq_m512i(r, e);
55995 }
55996
55997 #[simd_test(enable = "avx512f")]
55998 unsafe fn test_mm512_mask_andnot_epi32() {
55999 let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
56000 let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
56001 let r = _mm512_mask_andnot_epi32(a, 0, a, b);
56002 assert_eq_m512i(r, a);
56003 let r = _mm512_mask_andnot_epi32(a, 0b11111111_11111111, a, b);
56004 let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
56005 assert_eq_m512i(r, e);
56006 }
56007
56008 #[simd_test(enable = "avx512f")]
56009 unsafe fn test_mm512_maskz_andnot_epi32() {
56010 let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
56011 let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
56012 let r = _mm512_maskz_andnot_epi32(0, a, b);
56013 assert_eq_m512i(r, _mm512_setzero_si512());
56014 let r = _mm512_maskz_andnot_epi32(0b00000000_11111111, a, b);
56015 #[rustfmt::skip]
56016 let e = _mm512_set_epi32(
56017 0, 0, 0, 0,
56018 0, 0, 0, 0,
56019 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
56020 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
56021 );
56022 assert_eq_m512i(r, e);
56023 }
56024
56025 #[simd_test(enable = "avx512f,avx512vl")]
56026 unsafe fn test_mm256_mask_andnot_epi32() {
56027 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
56028 let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
56029 let r = _mm256_mask_andnot_epi32(a, 0, a, b);
56030 assert_eq_m256i(r, a);
56031 let r = _mm256_mask_andnot_epi32(a, 0b11111111, a, b);
56032 let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
56033 assert_eq_m256i(r, e);
56034 }
56035
56036 #[simd_test(enable = "avx512f,avx512vl")]
56037 unsafe fn test_mm256_maskz_andnot_epi32() {
56038 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
56039 let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
56040 let r = _mm256_maskz_andnot_epi32(0, a, b);
56041 assert_eq_m256i(r, _mm256_setzero_si256());
56042 let r = _mm256_maskz_andnot_epi32(0b11111111, a, b);
56043 let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
56044 assert_eq_m256i(r, e);
56045 }
56046
56047 #[simd_test(enable = "avx512f,avx512vl")]
56048 unsafe fn test_mm_mask_andnot_epi32() {
56049 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
56050 let b = _mm_set1_epi32(1 << 3 | 1 << 4);
56051 let r = _mm_mask_andnot_epi32(a, 0, a, b);
56052 assert_eq_m128i(r, a);
56053 let r = _mm_mask_andnot_epi32(a, 0b00001111, a, b);
56054 let e = _mm_set1_epi32(1 << 3 | 1 << 4);
56055 assert_eq_m128i(r, e);
56056 }
56057
56058 #[simd_test(enable = "avx512f,avx512vl")]
56059 unsafe fn test_mm_maskz_andnot_epi32() {
56060 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
56061 let b = _mm_set1_epi32(1 << 3 | 1 << 4);
56062 let r = _mm_maskz_andnot_epi32(0, a, b);
56063 assert_eq_m128i(r, _mm_setzero_si128());
56064 let r = _mm_maskz_andnot_epi32(0b00001111, a, b);
56065 let e = _mm_set1_epi32(1 << 3 | 1 << 4);
56066 assert_eq_m128i(r, e);
56067 }
56068
56069 #[simd_test(enable = "avx512f")]
56070 unsafe fn test_cvtmask16_u32() {
56071 let a: __mmask16 = 0b11001100_00110011;
56072 let r = _cvtmask16_u32(a);
56073 let e: u32 = 0b11001100_00110011;
56074 assert_eq!(r, e);
56075 }
56076
56077 #[simd_test(enable = "avx512f")]
56078 unsafe fn test_cvtu32_mask16() {
56079 let a: u32 = 0b11001100_00110011;
56080 let r = _cvtu32_mask16(a);
56081 let e: __mmask16 = 0b11001100_00110011;
56082 assert_eq!(r, e);
56083 }
56084
56085 #[simd_test(enable = "avx512f")]
56086 unsafe fn test_mm512_kand() {
56087 let a: u16 = 0b11001100_00110011;
56088 let b: u16 = 0b11001100_00110011;
56089 let r = _mm512_kand(a, b);
56090 let e: u16 = 0b11001100_00110011;
56091 assert_eq!(r, e);
56092 }
56093
56094 #[simd_test(enable = "avx512f")]
56095 unsafe fn test_kand_mask16() {
56096 let a: u16 = 0b11001100_00110011;
56097 let b: u16 = 0b11001100_00110011;
56098 let r = _kand_mask16(a, b);
56099 let e: u16 = 0b11001100_00110011;
56100 assert_eq!(r, e);
56101 }
56102
56103 #[simd_test(enable = "avx512f")]
56104 unsafe fn test_mm512_kor() {
56105 let a: u16 = 0b11001100_00110011;
56106 let b: u16 = 0b00101110_00001011;
56107 let r = _mm512_kor(a, b);
56108 let e: u16 = 0b11101110_00111011;
56109 assert_eq!(r, e);
56110 }
56111
56112 #[simd_test(enable = "avx512f")]
56113 unsafe fn test_kor_mask16() {
56114 let a: u16 = 0b11001100_00110011;
56115 let b: u16 = 0b00101110_00001011;
56116 let r = _kor_mask16(a, b);
56117 let e: u16 = 0b11101110_00111011;
56118 assert_eq!(r, e);
56119 }
56120
56121 #[simd_test(enable = "avx512f")]
56122 unsafe fn test_mm512_kxor() {
56123 let a: u16 = 0b11001100_00110011;
56124 let b: u16 = 0b00101110_00001011;
56125 let r = _mm512_kxor(a, b);
56126 let e: u16 = 0b11100010_00111000;
56127 assert_eq!(r, e);
56128 }
56129
56130 #[simd_test(enable = "avx512f")]
56131 unsafe fn test_kxor_mask16() {
56132 let a: u16 = 0b11001100_00110011;
56133 let b: u16 = 0b00101110_00001011;
56134 let r = _kxor_mask16(a, b);
56135 let e: u16 = 0b11100010_00111000;
56136 assert_eq!(r, e);
56137 }
56138
56139 #[simd_test(enable = "avx512f")]
56140 unsafe fn test_mm512_knot() {
56141 let a: u16 = 0b11001100_00110011;
56142 let r = _mm512_knot(a);
56143 let e: u16 = 0b00110011_11001100;
56144 assert_eq!(r, e);
56145 }
56146
56147 #[simd_test(enable = "avx512f")]
56148 unsafe fn test_knot_mask16() {
56149 let a: u16 = 0b11001100_00110011;
56150 let r = _knot_mask16(a);
56151 let e: u16 = 0b00110011_11001100;
56152 assert_eq!(r, e);
56153 }
56154
56155 #[simd_test(enable = "avx512f")]
56156 unsafe fn test_mm512_kandn() {
56157 let a: u16 = 0b11001100_00110011;
56158 let b: u16 = 0b00101110_00001011;
56159 let r = _mm512_kandn(a, b);
56160 let e: u16 = 0b00100010_00001000;
56161 assert_eq!(r, e);
56162 }
56163
56164 #[simd_test(enable = "avx512f")]
56165 unsafe fn test_kandn_mask16() {
56166 let a: u16 = 0b11001100_00110011;
56167 let b: u16 = 0b00101110_00001011;
56168 let r = _kandn_mask16(a, b);
56169 let e: u16 = 0b00100010_00001000;
56170 assert_eq!(r, e);
56171 }
56172
56173 #[simd_test(enable = "avx512f")]
56174 unsafe fn test_mm512_kxnor() {
56175 let a: u16 = 0b11001100_00110011;
56176 let b: u16 = 0b00101110_00001011;
56177 let r = _mm512_kxnor(a, b);
56178 let e: u16 = 0b00011101_11000111;
56179 assert_eq!(r, e);
56180 }
56181
56182 #[simd_test(enable = "avx512f")]
56183 unsafe fn test_kxnor_mask16() {
56184 let a: u16 = 0b11001100_00110011;
56185 let b: u16 = 0b00101110_00001011;
56186 let r = _kxnor_mask16(a, b);
56187 let e: u16 = 0b00011101_11000111;
56188 assert_eq!(r, e);
56189 }
56190
56191 #[simd_test(enable = "avx512dq")]
56192 unsafe fn test_kortest_mask16_u8() {
56193 let a: __mmask16 = 0b0110100101101001;
56194 let b: __mmask16 = 0b1011011010110110;
56195 let mut all_ones: u8 = 0;
56196 let r = _kortest_mask16_u8(a, b, &mut all_ones);
56197 assert_eq!(r, 0);
56198 assert_eq!(all_ones, 1);
56199 }
56200
56201 #[simd_test(enable = "avx512dq")]
56202 unsafe fn test_kortestc_mask16_u8() {
56203 let a: __mmask16 = 0b0110100101101001;
56204 let b: __mmask16 = 0b1011011010110110;
56205 let r = _kortestc_mask16_u8(a, b);
56206 assert_eq!(r, 1);
56207 }
56208
56209 #[simd_test(enable = "avx512dq")]
56210 unsafe fn test_kortestz_mask16_u8() {
56211 let a: __mmask16 = 0b0110100101101001;
56212 let b: __mmask16 = 0b1011011010110110;
56213 let r = _kortestz_mask16_u8(a, b);
56214 assert_eq!(r, 0);
56215 }
56216
56217 #[simd_test(enable = "avx512dq")]
56218 unsafe fn test_kshiftli_mask16() {
56219 let a: __mmask16 = 0b1001011011000011;
56220 let r = _kshiftli_mask16::<3>(a);
56221 let e: __mmask16 = 0b1011011000011000;
56222 assert_eq!(r, e);
56223 }
56224
56225 #[simd_test(enable = "avx512dq")]
56226 unsafe fn test_kshiftri_mask16() {
56227 let a: __mmask16 = 0b0110100100111100;
56228 let r = _kshiftri_mask16::<3>(a);
56229 let e: __mmask16 = 0b0000110100100111;
56230 assert_eq!(r, e);
56231 }
56232
56233 #[simd_test(enable = "avx512f")]
56234 unsafe fn test_load_mask16() {
56235 let a: __mmask16 = 0b1001011011000011;
56236 let r = _load_mask16(&a);
56237 let e: __mmask16 = 0b1001011011000011;
56238 assert_eq!(r, e);
56239 }
56240
56241 #[simd_test(enable = "avx512f")]
56242 unsafe fn test_store_mask16() {
56243 let a: __mmask16 = 0b0110100100111100;
56244 let mut r = 0;
56245 _store_mask16(&mut r, a);
56246 let e: __mmask16 = 0b0110100100111100;
56247 assert_eq!(r, e);
56248 }
56249
56250 #[simd_test(enable = "avx512f")]
56251 unsafe fn test_mm512_kmov() {
56252 let a: u16 = 0b11001100_00110011;
56253 let r = _mm512_kmov(a);
56254 let e: u16 = 0b11001100_00110011;
56255 assert_eq!(r, e);
56256 }
56257
56258 #[simd_test(enable = "avx512f")]
56259 unsafe fn test_mm512_int2mask() {
56260 let a: i32 = 0b11001100_00110011;
56261 let r = _mm512_int2mask(a);
56262 let e: u16 = 0b11001100_00110011;
56263 assert_eq!(r, e);
56264 }
56265
56266 #[simd_test(enable = "avx512f")]
56267 unsafe fn test_mm512_mask2int() {
56268 let k1: __mmask16 = 0b11001100_00110011;
56269 let r = _mm512_mask2int(k1);
56270 let e: i32 = 0b11001100_00110011;
56271 assert_eq!(r, e);
56272 }
56273
56274 #[simd_test(enable = "avx512f")]
56275 unsafe fn test_mm512_kunpackb() {
56276 let a: u16 = 0b11001100_00110011;
56277 let b: u16 = 0b00101110_00001011;
56278 let r = _mm512_kunpackb(a, b);
56279 let e: u16 = 0b00110011_00001011;
56280 assert_eq!(r, e);
56281 }
56282
56283 #[simd_test(enable = "avx512f")]
56284 unsafe fn test_mm512_kortestc() {
56285 let a: u16 = 0b11001100_00110011;
56286 let b: u16 = 0b00101110_00001011;
56287 let r = _mm512_kortestc(a, b);
56288 assert_eq!(r, 0);
56289 let b: u16 = 0b11111111_11111111;
56290 let r = _mm512_kortestc(a, b);
56291 assert_eq!(r, 1);
56292 }
56293
56294 #[simd_test(enable = "avx512f")]
56295 unsafe fn test_mm512_kortestz() {
56296 let a: u16 = 0b11001100_00110011;
56297 let b: u16 = 0b00101110_00001011;
56298 let r = _mm512_kortestz(a, b);
56299 assert_eq!(r, 0);
56300 let r = _mm512_kortestz(0, 0);
56301 assert_eq!(r, 1);
56302 }
56303
56304 #[simd_test(enable = "avx512f")]
56305 unsafe fn test_mm512_test_epi32_mask() {
56306 let a = _mm512_set1_epi32(1 << 0);
56307 let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56308 let r = _mm512_test_epi32_mask(a, b);
56309 let e: __mmask16 = 0b11111111_11111111;
56310 assert_eq!(r, e);
56311 }
56312
56313 #[simd_test(enable = "avx512f")]
56314 unsafe fn test_mm512_mask_test_epi32_mask() {
56315 let a = _mm512_set1_epi32(1 << 0);
56316 let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56317 let r = _mm512_mask_test_epi32_mask(0, a, b);
56318 assert_eq!(r, 0);
56319 let r = _mm512_mask_test_epi32_mask(0b11111111_11111111, a, b);
56320 let e: __mmask16 = 0b11111111_11111111;
56321 assert_eq!(r, e);
56322 }
56323
56324 #[simd_test(enable = "avx512f,avx512vl")]
56325 unsafe fn test_mm256_test_epi32_mask() {
56326 let a = _mm256_set1_epi32(1 << 0);
56327 let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
56328 let r = _mm256_test_epi32_mask(a, b);
56329 let e: __mmask8 = 0b11111111;
56330 assert_eq!(r, e);
56331 }
56332
56333 #[simd_test(enable = "avx512f,avx512vl")]
56334 unsafe fn test_mm256_mask_test_epi32_mask() {
56335 let a = _mm256_set1_epi32(1 << 0);
56336 let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
56337 let r = _mm256_mask_test_epi32_mask(0, a, b);
56338 assert_eq!(r, 0);
56339 let r = _mm256_mask_test_epi32_mask(0b11111111, a, b);
56340 let e: __mmask8 = 0b11111111;
56341 assert_eq!(r, e);
56342 }
56343
56344 #[simd_test(enable = "avx512f,avx512vl")]
56345 unsafe fn test_mm_test_epi32_mask() {
56346 let a = _mm_set1_epi32(1 << 0);
56347 let b = _mm_set1_epi32(1 << 0 | 1 << 1);
56348 let r = _mm_test_epi32_mask(a, b);
56349 let e: __mmask8 = 0b00001111;
56350 assert_eq!(r, e);
56351 }
56352
56353 #[simd_test(enable = "avx512f,avx512vl")]
56354 unsafe fn test_mm_mask_test_epi32_mask() {
56355 let a = _mm_set1_epi32(1 << 0);
56356 let b = _mm_set1_epi32(1 << 0 | 1 << 1);
56357 let r = _mm_mask_test_epi32_mask(0, a, b);
56358 assert_eq!(r, 0);
56359 let r = _mm_mask_test_epi32_mask(0b11111111, a, b);
56360 let e: __mmask8 = 0b00001111;
56361 assert_eq!(r, e);
56362 }
56363
56364 #[simd_test(enable = "avx512f")]
56365 unsafe fn test_mm512_testn_epi32_mask() {
56366 let a = _mm512_set1_epi32(1 << 0);
56367 let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56368 let r = _mm512_testn_epi32_mask(a, b);
56369 let e: __mmask16 = 0b00000000_00000000;
56370 assert_eq!(r, e);
56371 }
56372
56373 #[simd_test(enable = "avx512f")]
56374 unsafe fn test_mm512_mask_testn_epi32_mask() {
56375 let a = _mm512_set1_epi32(1 << 0);
56376 let b = _mm512_set1_epi32(1 << 1);
56377 let r = _mm512_mask_test_epi32_mask(0, a, b);
56378 assert_eq!(r, 0);
56379 let r = _mm512_mask_testn_epi32_mask(0b11111111_11111111, a, b);
56380 let e: __mmask16 = 0b11111111_11111111;
56381 assert_eq!(r, e);
56382 }
56383
56384 #[simd_test(enable = "avx512f,avx512vl")]
56385 unsafe fn test_mm256_testn_epi32_mask() {
56386 let a = _mm256_set1_epi32(1 << 0);
56387 let b = _mm256_set1_epi32(1 << 1);
56388 let r = _mm256_testn_epi32_mask(a, b);
56389 let e: __mmask8 = 0b11111111;
56390 assert_eq!(r, e);
56391 }
56392
56393 #[simd_test(enable = "avx512f,avx512vl")]
56394 unsafe fn test_mm256_mask_testn_epi32_mask() {
56395 let a = _mm256_set1_epi32(1 << 0);
56396 let b = _mm256_set1_epi32(1 << 1);
56397 let r = _mm256_mask_test_epi32_mask(0, a, b);
56398 assert_eq!(r, 0);
56399 let r = _mm256_mask_testn_epi32_mask(0b11111111, a, b);
56400 let e: __mmask8 = 0b11111111;
56401 assert_eq!(r, e);
56402 }
56403
56404 #[simd_test(enable = "avx512f,avx512vl")]
56405 unsafe fn test_mm_testn_epi32_mask() {
56406 let a = _mm_set1_epi32(1 << 0);
56407 let b = _mm_set1_epi32(1 << 1);
56408 let r = _mm_testn_epi32_mask(a, b);
56409 let e: __mmask8 = 0b00001111;
56410 assert_eq!(r, e);
56411 }
56412
56413 #[simd_test(enable = "avx512f,avx512vl")]
56414 unsafe fn test_mm_mask_testn_epi32_mask() {
56415 let a = _mm_set1_epi32(1 << 0);
56416 let b = _mm_set1_epi32(1 << 1);
56417 let r = _mm_mask_test_epi32_mask(0, a, b);
56418 assert_eq!(r, 0);
56419 let r = _mm_mask_testn_epi32_mask(0b11111111, a, b);
56420 let e: __mmask8 = 0b00001111;
56421 assert_eq!(r, e);
56422 }
56423
56424 #[simd_test(enable = "avx512f")]
56425 #[cfg_attr(miri, ignore)]
56426 unsafe fn test_mm512_stream_ps() {
56427 #[repr(align(64))]
56428 struct Memory {
56429 pub data: [f32; 16], // 64 bytes
56430 }
56431 let a = _mm512_set1_ps(7.0);
56432 let mut mem = Memory { data: [-1.0; 16] };
56433
56434 _mm512_stream_ps(&mut mem.data[0] as *mut f32, a);
56435 for i in 0..16 {
56436 assert_eq!(mem.data[i], get_m512(a, i));
56437 }
56438 }
56439
56440 #[simd_test(enable = "avx512f")]
56441 #[cfg_attr(miri, ignore)]
56442 unsafe fn test_mm512_stream_pd() {
56443 #[repr(align(64))]
56444 struct Memory {
56445 pub data: [f64; 8],
56446 }
56447 let a = _mm512_set1_pd(7.0);
56448 let mut mem = Memory { data: [-1.0; 8] };
56449
56450 _mm512_stream_pd(&mut mem.data[0] as *mut f64, a);
56451 for i in 0..8 {
56452 assert_eq!(mem.data[i], get_m512d(a, i));
56453 }
56454 }
56455
56456 #[simd_test(enable = "avx512f")]
56457 #[cfg_attr(miri, ignore)]
56458 unsafe fn test_mm512_stream_si512() {
56459 #[repr(align(64))]
56460 struct Memory {
56461 pub data: [i64; 8],
56462 }
56463 let a = _mm512_set1_epi32(7);
56464 let mut mem = Memory { data: [-1; 8] };
56465
56466 _mm512_stream_si512(mem.data.as_mut_ptr().cast(), a);
56467 for i in 0..8 {
56468 assert_eq!(mem.data[i], get_m512i(a, i));
56469 }
56470 }
56471
56472 #[simd_test(enable = "avx512f")]
56473 unsafe fn test_mm512_stream_load_si512() {
56474 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
56475 let r = _mm512_stream_load_si512(core::ptr::addr_of!(a) as *const _);
56476 assert_eq_m512i(a, r);
56477 }
56478
56479 #[simd_test(enable = "avx512f")]
56480 unsafe fn test_mm512_reduce_add_epi32() {
56481 let a = _mm512_set1_epi32(1);
56482 let e: i32 = _mm512_reduce_add_epi32(a);
56483 assert_eq!(16, e);
56484 }
56485
56486 #[simd_test(enable = "avx512f")]
56487 unsafe fn test_mm512_mask_reduce_add_epi32() {
56488 let a = _mm512_set1_epi32(1);
56489 let e: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a);
56490 assert_eq!(8, e);
56491 }
56492
56493 #[simd_test(enable = "avx512f")]
56494 unsafe fn test_mm512_reduce_add_ps() {
56495 let a = _mm512_set1_ps(1.);
56496 let e: f32 = _mm512_reduce_add_ps(a);
56497 assert_eq!(16., e);
56498 }
56499
56500 #[simd_test(enable = "avx512f")]
56501 unsafe fn test_mm512_mask_reduce_add_ps() {
56502 let a = _mm512_set1_ps(1.);
56503 let e: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a);
56504 assert_eq!(8., e);
56505 }
56506
56507 #[simd_test(enable = "avx512f")]
56508 unsafe fn test_mm512_reduce_mul_epi32() {
56509 let a = _mm512_set1_epi32(2);
56510 let e: i32 = _mm512_reduce_mul_epi32(a);
56511 assert_eq!(65536, e);
56512 }
56513
56514 #[simd_test(enable = "avx512f")]
56515 unsafe fn test_mm512_mask_reduce_mul_epi32() {
56516 let a = _mm512_set1_epi32(2);
56517 let e: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a);
56518 assert_eq!(256, e);
56519 }
56520
56521 #[simd_test(enable = "avx512f")]
56522 unsafe fn test_mm512_reduce_mul_ps() {
56523 let a = _mm512_set1_ps(2.);
56524 let e: f32 = _mm512_reduce_mul_ps(a);
56525 assert_eq!(65536., e);
56526 }
56527
56528 #[simd_test(enable = "avx512f")]
56529 unsafe fn test_mm512_mask_reduce_mul_ps() {
56530 let a = _mm512_set1_ps(2.);
56531 let e: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a);
56532 assert_eq!(256., e);
56533 }
56534
56535 #[simd_test(enable = "avx512f")]
56536 unsafe fn test_mm512_reduce_max_epi32() {
56537 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56538 let e: i32 = _mm512_reduce_max_epi32(a);
56539 assert_eq!(15, e);
56540 }
56541
56542 #[simd_test(enable = "avx512f")]
56543 unsafe fn test_mm512_mask_reduce_max_epi32() {
56544 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56545 let e: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a);
56546 assert_eq!(7, e);
56547 }
56548
56549 #[simd_test(enable = "avx512f")]
56550 unsafe fn test_mm512_reduce_max_epu32() {
56551 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56552 let e: u32 = _mm512_reduce_max_epu32(a);
56553 assert_eq!(15, e);
56554 }
56555
56556 #[simd_test(enable = "avx512f")]
56557 unsafe fn test_mm512_mask_reduce_max_epu32() {
56558 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56559 let e: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a);
56560 assert_eq!(7, e);
56561 }
56562
56563 #[simd_test(enable = "avx512f")]
56564 unsafe fn test_mm512_reduce_max_ps() {
56565 let a = _mm512_set_ps(
56566 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56567 );
56568 let e: f32 = _mm512_reduce_max_ps(a);
56569 assert_eq!(15., e);
56570 }
56571
56572 #[simd_test(enable = "avx512f")]
56573 unsafe fn test_mm512_mask_reduce_max_ps() {
56574 let a = _mm512_set_ps(
56575 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56576 );
56577 let e: f32 = _mm512_mask_reduce_max_ps(0b11111111_00000000, a);
56578 assert_eq!(7., e);
56579 }
56580
56581 #[simd_test(enable = "avx512f")]
56582 unsafe fn test_mm512_reduce_min_epi32() {
56583 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56584 let e: i32 = _mm512_reduce_min_epi32(a);
56585 assert_eq!(0, e);
56586 }
56587
56588 #[simd_test(enable = "avx512f")]
56589 unsafe fn test_mm512_mask_reduce_min_epi32() {
56590 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56591 let e: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a);
56592 assert_eq!(0, e);
56593 }
56594
56595 #[simd_test(enable = "avx512f")]
56596 unsafe fn test_mm512_reduce_min_epu32() {
56597 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56598 let e: u32 = _mm512_reduce_min_epu32(a);
56599 assert_eq!(0, e);
56600 }
56601
56602 #[simd_test(enable = "avx512f")]
56603 unsafe fn test_mm512_mask_reduce_min_epu32() {
56604 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56605 let e: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a);
56606 assert_eq!(0, e);
56607 }
56608
56609 #[simd_test(enable = "avx512f")]
56610 unsafe fn test_mm512_reduce_min_ps() {
56611 let a = _mm512_set_ps(
56612 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56613 );
56614 let e: f32 = _mm512_reduce_min_ps(a);
56615 assert_eq!(0., e);
56616 }
56617
56618 #[simd_test(enable = "avx512f")]
56619 unsafe fn test_mm512_mask_reduce_min_ps() {
56620 let a = _mm512_set_ps(
56621 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56622 );
56623 let e: f32 = _mm512_mask_reduce_min_ps(0b11111111_00000000, a);
56624 assert_eq!(0., e);
56625 }
56626
56627 #[simd_test(enable = "avx512f")]
56628 unsafe fn test_mm512_reduce_and_epi32() {
56629 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56630 let e: i32 = _mm512_reduce_and_epi32(a);
56631 assert_eq!(0, e);
56632 }
56633
56634 #[simd_test(enable = "avx512f")]
56635 unsafe fn test_mm512_mask_reduce_and_epi32() {
56636 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56637 let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
56638 assert_eq!(1, e);
56639 }
56640
56641 #[simd_test(enable = "avx512f")]
56642 unsafe fn test_mm512_reduce_or_epi32() {
56643 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56644 let e: i32 = _mm512_reduce_or_epi32(a);
56645 assert_eq!(3, e);
56646 }
56647
56648 #[simd_test(enable = "avx512f")]
56649 unsafe fn test_mm512_mask_reduce_or_epi32() {
56650 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56651 let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
56652 assert_eq!(1, e);
56653 }
56654
56655 #[simd_test(enable = "avx512f")]
56656 unsafe fn test_mm512_mask_compress_epi32() {
56657 let src = _mm512_set1_epi32(200);
56658 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56659 let r = _mm512_mask_compress_epi32(src, 0, a);
56660 assert_eq_m512i(r, src);
56661 let r = _mm512_mask_compress_epi32(src, 0b01010101_01010101, a);
56662 let e = _mm512_set_epi32(
56663 200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
56664 );
56665 assert_eq_m512i(r, e);
56666 }
56667
56668 #[simd_test(enable = "avx512f")]
56669 unsafe fn test_mm512_maskz_compress_epi32() {
56670 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56671 let r = _mm512_maskz_compress_epi32(0, a);
56672 assert_eq_m512i(r, _mm512_setzero_si512());
56673 let r = _mm512_maskz_compress_epi32(0b01010101_01010101, a);
56674 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
56675 assert_eq_m512i(r, e);
56676 }
56677
56678 #[simd_test(enable = "avx512f,avx512vl")]
56679 unsafe fn test_mm256_mask_compress_epi32() {
56680 let src = _mm256_set1_epi32(200);
56681 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56682 let r = _mm256_mask_compress_epi32(src, 0, a);
56683 assert_eq_m256i(r, src);
56684 let r = _mm256_mask_compress_epi32(src, 0b01010101, a);
56685 let e = _mm256_set_epi32(200, 200, 200, 200, 1, 3, 5, 7);
56686 assert_eq_m256i(r, e);
56687 }
56688
56689 #[simd_test(enable = "avx512f,avx512vl")]
56690 unsafe fn test_mm256_maskz_compress_epi32() {
56691 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56692 let r = _mm256_maskz_compress_epi32(0, a);
56693 assert_eq_m256i(r, _mm256_setzero_si256());
56694 let r = _mm256_maskz_compress_epi32(0b01010101, a);
56695 let e = _mm256_set_epi32(0, 0, 0, 0, 1, 3, 5, 7);
56696 assert_eq_m256i(r, e);
56697 }
56698
56699 #[simd_test(enable = "avx512f,avx512vl")]
56700 unsafe fn test_mm_mask_compress_epi32() {
56701 let src = _mm_set1_epi32(200);
56702 let a = _mm_set_epi32(0, 1, 2, 3);
56703 let r = _mm_mask_compress_epi32(src, 0, a);
56704 assert_eq_m128i(r, src);
56705 let r = _mm_mask_compress_epi32(src, 0b00000101, a);
56706 let e = _mm_set_epi32(200, 200, 1, 3);
56707 assert_eq_m128i(r, e);
56708 }
56709
56710 #[simd_test(enable = "avx512f,avx512vl")]
56711 unsafe fn test_mm_maskz_compress_epi32() {
56712 let a = _mm_set_epi32(0, 1, 2, 3);
56713 let r = _mm_maskz_compress_epi32(0, a);
56714 assert_eq_m128i(r, _mm_setzero_si128());
56715 let r = _mm_maskz_compress_epi32(0b00000101, a);
56716 let e = _mm_set_epi32(0, 0, 1, 3);
56717 assert_eq_m128i(r, e);
56718 }
56719
56720 #[simd_test(enable = "avx512f")]
56721 unsafe fn test_mm512_mask_compress_ps() {
56722 let src = _mm512_set1_ps(200.);
56723 let a = _mm512_set_ps(
56724 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56725 );
56726 let r = _mm512_mask_compress_ps(src, 0, a);
56727 assert_eq_m512(r, src);
56728 let r = _mm512_mask_compress_ps(src, 0b01010101_01010101, a);
56729 let e = _mm512_set_ps(
56730 200., 200., 200., 200., 200., 200., 200., 200., 1., 3., 5., 7., 9., 11., 13., 15.,
56731 );
56732 assert_eq_m512(r, e);
56733 }
56734
56735 #[simd_test(enable = "avx512f")]
56736 unsafe fn test_mm512_maskz_compress_ps() {
56737 let a = _mm512_set_ps(
56738 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56739 );
56740 let r = _mm512_maskz_compress_ps(0, a);
56741 assert_eq_m512(r, _mm512_setzero_ps());
56742 let r = _mm512_maskz_compress_ps(0b01010101_01010101, a);
56743 let e = _mm512_set_ps(
56744 0., 0., 0., 0., 0., 0., 0., 0., 1., 3., 5., 7., 9., 11., 13., 15.,
56745 );
56746 assert_eq_m512(r, e);
56747 }
56748
56749 #[simd_test(enable = "avx512f,avx512vl")]
56750 unsafe fn test_mm256_mask_compress_ps() {
56751 let src = _mm256_set1_ps(200.);
56752 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56753 let r = _mm256_mask_compress_ps(src, 0, a);
56754 assert_eq_m256(r, src);
56755 let r = _mm256_mask_compress_ps(src, 0b01010101, a);
56756 let e = _mm256_set_ps(200., 200., 200., 200., 1., 3., 5., 7.);
56757 assert_eq_m256(r, e);
56758 }
56759
56760 #[simd_test(enable = "avx512f,avx512vl")]
56761 unsafe fn test_mm256_maskz_compress_ps() {
56762 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56763 let r = _mm256_maskz_compress_ps(0, a);
56764 assert_eq_m256(r, _mm256_setzero_ps());
56765 let r = _mm256_maskz_compress_ps(0b01010101, a);
56766 let e = _mm256_set_ps(0., 0., 0., 0., 1., 3., 5., 7.);
56767 assert_eq_m256(r, e);
56768 }
56769
56770 #[simd_test(enable = "avx512f,avx512vl")]
56771 unsafe fn test_mm_mask_compress_ps() {
56772 let src = _mm_set1_ps(200.);
56773 let a = _mm_set_ps(0., 1., 2., 3.);
56774 let r = _mm_mask_compress_ps(src, 0, a);
56775 assert_eq_m128(r, src);
56776 let r = _mm_mask_compress_ps(src, 0b00000101, a);
56777 let e = _mm_set_ps(200., 200., 1., 3.);
56778 assert_eq_m128(r, e);
56779 }
56780
56781 #[simd_test(enable = "avx512f,avx512vl")]
56782 unsafe fn test_mm_maskz_compress_ps() {
56783 let a = _mm_set_ps(0., 1., 2., 3.);
56784 let r = _mm_maskz_compress_ps(0, a);
56785 assert_eq_m128(r, _mm_setzero_ps());
56786 let r = _mm_maskz_compress_ps(0b00000101, a);
56787 let e = _mm_set_ps(0., 0., 1., 3.);
56788 assert_eq_m128(r, e);
56789 }
56790
56791 #[simd_test(enable = "avx512f")]
56792 unsafe fn test_mm512_mask_compressstoreu_epi32() {
56793 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56794 let mut r = [0_i32; 16];
56795 _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
56796 assert_eq!(&r, &[0_i32; 16]);
56797 _mm512_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1111000011001010, a);
56798 assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
56799 }
56800
56801 #[simd_test(enable = "avx512f,avx512vl")]
56802 unsafe fn test_mm256_mask_compressstoreu_epi32() {
56803 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56804 let mut r = [0_i32; 8];
56805 _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
56806 assert_eq!(&r, &[0_i32; 8]);
56807 _mm256_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b11001010, a);
56808 assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
56809 }
56810
56811 #[simd_test(enable = "avx512f,avx512vl")]
56812 unsafe fn test_mm_mask_compressstoreu_epi32() {
56813 let a = _mm_setr_epi32(1, 2, 3, 4);
56814 let mut r = [0_i32; 4];
56815 _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0, a);
56816 assert_eq!(&r, &[0_i32; 4]);
56817 _mm_mask_compressstoreu_epi32(r.as_mut_ptr(), 0b1011, a);
56818 assert_eq!(&r, &[1, 2, 4, 0]);
56819 }
56820
56821 #[simd_test(enable = "avx512f")]
56822 unsafe fn test_mm512_mask_compressstoreu_epi64() {
56823 let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
56824 let mut r = [0_i64; 8];
56825 _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
56826 assert_eq!(&r, &[0_i64; 8]);
56827 _mm512_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b11001010, a);
56828 assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
56829 }
56830
56831 #[simd_test(enable = "avx512f,avx512vl")]
56832 unsafe fn test_mm256_mask_compressstoreu_epi64() {
56833 let a = _mm256_setr_epi64x(1, 2, 3, 4);
56834 let mut r = [0_i64; 4];
56835 _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
56836 assert_eq!(&r, &[0_i64; 4]);
56837 _mm256_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b1011, a);
56838 assert_eq!(&r, &[1, 2, 4, 0]);
56839 }
56840
56841 #[simd_test(enable = "avx512f,avx512vl")]
56842 unsafe fn test_mm_mask_compressstoreu_epi64() {
56843 let a = _mm_setr_epi64x(1, 2);
56844 let mut r = [0_i64; 2];
56845 _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0, a);
56846 assert_eq!(&r, &[0_i64; 2]);
56847 _mm_mask_compressstoreu_epi64(r.as_mut_ptr(), 0b10, a);
56848 assert_eq!(&r, &[2, 0]);
56849 }
56850
56851 #[simd_test(enable = "avx512f")]
56852 unsafe fn test_mm512_mask_compressstoreu_ps() {
56853 let a = _mm512_setr_ps(
56854 1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32, 9_f32, 10_f32, 11_f32, 12_f32,
56855 13_f32, 14_f32, 15_f32, 16_f32,
56856 );
56857 let mut r = [0_f32; 16];
56858 _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
56859 assert_eq!(&r, &[0_f32; 16]);
56860 _mm512_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1111000011001010, a);
56861 assert_eq!(
56862 &r,
56863 &[
56864 2_f32, 4_f32, 7_f32, 8_f32, 13_f32, 14_f32, 15_f32, 16_f32, 0_f32, 0_f32, 0_f32,
56865 0_f32, 0_f32, 0_f32, 0_f32, 0_f32
56866 ]
56867 );
56868 }
56869
56870 #[simd_test(enable = "avx512f,avx512vl")]
56871 unsafe fn test_mm256_mask_compressstoreu_ps() {
56872 let a = _mm256_setr_ps(1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32);
56873 let mut r = [0_f32; 8];
56874 _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
56875 assert_eq!(&r, &[0_f32; 8]);
56876 _mm256_mask_compressstoreu_ps(r.as_mut_ptr(), 0b11001010, a);
56877 assert_eq!(
56878 &r,
56879 &[2_f32, 4_f32, 7_f32, 8_f32, 0_f32, 0_f32, 0_f32, 0_f32]
56880 );
56881 }
56882
56883 #[simd_test(enable = "avx512f,avx512vl")]
56884 unsafe fn test_mm_mask_compressstoreu_ps() {
56885 let a = _mm_setr_ps(1_f32, 2_f32, 3_f32, 4_f32);
56886 let mut r = [0.; 4];
56887 _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0, a);
56888 assert_eq!(&r, &[0.; 4]);
56889 _mm_mask_compressstoreu_ps(r.as_mut_ptr(), 0b1011, a);
56890 assert_eq!(&r, &[1_f32, 2_f32, 4_f32, 0_f32]);
56891 }
56892
56893 #[simd_test(enable = "avx512f")]
56894 unsafe fn test_mm512_mask_compressstoreu_pd() {
56895 let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
56896 let mut r = [0.; 8];
56897 _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
56898 assert_eq!(&r, &[0.; 8]);
56899 _mm512_mask_compressstoreu_pd(r.as_mut_ptr(), 0b11001010, a);
56900 assert_eq!(&r, &[2., 4., 7., 8., 0., 0., 0., 0.]);
56901 }
56902
56903 #[simd_test(enable = "avx512f,avx512vl")]
56904 unsafe fn test_mm256_mask_compressstoreu_pd() {
56905 let a = _mm256_setr_pd(1., 2., 3., 4.);
56906 let mut r = [0.; 4];
56907 _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
56908 assert_eq!(&r, &[0.; 4]);
56909 _mm256_mask_compressstoreu_pd(r.as_mut_ptr(), 0b1011, a);
56910 assert_eq!(&r, &[1., 2., 4., 0.]);
56911 }
56912
56913 #[simd_test(enable = "avx512f,avx512vl")]
56914 unsafe fn test_mm_mask_compressstoreu_pd() {
56915 let a = _mm_setr_pd(1., 2.);
56916 let mut r = [0.; 2];
56917 _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0, a);
56918 assert_eq!(&r, &[0.; 2]);
56919 _mm_mask_compressstoreu_pd(r.as_mut_ptr(), 0b10, a);
56920 assert_eq!(&r, &[2., 0.]);
56921 }
56922
56923 #[simd_test(enable = "avx512f")]
56924 unsafe fn test_mm512_mask_expand_epi32() {
56925 let src = _mm512_set1_epi32(200);
56926 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56927 let r = _mm512_mask_expand_epi32(src, 0, a);
56928 assert_eq_m512i(r, src);
56929 let r = _mm512_mask_expand_epi32(src, 0b01010101_01010101, a);
56930 let e = _mm512_set_epi32(
56931 200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
56932 );
56933 assert_eq_m512i(r, e);
56934 }
56935
56936 #[simd_test(enable = "avx512f")]
56937 unsafe fn test_mm512_maskz_expand_epi32() {
56938 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56939 let r = _mm512_maskz_expand_epi32(0, a);
56940 assert_eq_m512i(r, _mm512_setzero_si512());
56941 let r = _mm512_maskz_expand_epi32(0b01010101_01010101, a);
56942 let e = _mm512_set_epi32(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
56943 assert_eq_m512i(r, e);
56944 }
56945
56946 #[simd_test(enable = "avx512f,avx512vl")]
56947 unsafe fn test_mm256_mask_expand_epi32() {
56948 let src = _mm256_set1_epi32(200);
56949 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56950 let r = _mm256_mask_expand_epi32(src, 0, a);
56951 assert_eq_m256i(r, src);
56952 let r = _mm256_mask_expand_epi32(src, 0b01010101, a);
56953 let e = _mm256_set_epi32(200, 4, 200, 5, 200, 6, 200, 7);
56954 assert_eq_m256i(r, e);
56955 }
56956
56957 #[simd_test(enable = "avx512f,avx512vl")]
56958 unsafe fn test_mm256_maskz_expand_epi32() {
56959 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56960 let r = _mm256_maskz_expand_epi32(0, a);
56961 assert_eq_m256i(r, _mm256_setzero_si256());
56962 let r = _mm256_maskz_expand_epi32(0b01010101, a);
56963 let e = _mm256_set_epi32(0, 4, 0, 5, 0, 6, 0, 7);
56964 assert_eq_m256i(r, e);
56965 }
56966
56967 #[simd_test(enable = "avx512f,avx512vl")]
56968 unsafe fn test_mm_mask_expand_epi32() {
56969 let src = _mm_set1_epi32(200);
56970 let a = _mm_set_epi32(0, 1, 2, 3);
56971 let r = _mm_mask_expand_epi32(src, 0, a);
56972 assert_eq_m128i(r, src);
56973 let r = _mm_mask_expand_epi32(src, 0b00000101, a);
56974 let e = _mm_set_epi32(200, 2, 200, 3);
56975 assert_eq_m128i(r, e);
56976 }
56977
56978 #[simd_test(enable = "avx512f,avx512vl")]
56979 unsafe fn test_mm_maskz_expand_epi32() {
56980 let a = _mm_set_epi32(0, 1, 2, 3);
56981 let r = _mm_maskz_expand_epi32(0, a);
56982 assert_eq_m128i(r, _mm_setzero_si128());
56983 let r = _mm_maskz_expand_epi32(0b00000101, a);
56984 let e = _mm_set_epi32(0, 2, 0, 3);
56985 assert_eq_m128i(r, e);
56986 }
56987
56988 #[simd_test(enable = "avx512f")]
56989 unsafe fn test_mm512_mask_expand_ps() {
56990 let src = _mm512_set1_ps(200.);
56991 let a = _mm512_set_ps(
56992 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56993 );
56994 let r = _mm512_mask_expand_ps(src, 0, a);
56995 assert_eq_m512(r, src);
56996 let r = _mm512_mask_expand_ps(src, 0b01010101_01010101, a);
56997 let e = _mm512_set_ps(
56998 200., 8., 200., 9., 200., 10., 200., 11., 200., 12., 200., 13., 200., 14., 200., 15.,
56999 );
57000 assert_eq_m512(r, e);
57001 }
57002
57003 #[simd_test(enable = "avx512f")]
57004 unsafe fn test_mm512_maskz_expand_ps() {
57005 let a = _mm512_set_ps(
57006 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
57007 );
57008 let r = _mm512_maskz_expand_ps(0, a);
57009 assert_eq_m512(r, _mm512_setzero_ps());
57010 let r = _mm512_maskz_expand_ps(0b01010101_01010101, a);
57011 let e = _mm512_set_ps(
57012 0., 8., 0., 9., 0., 10., 0., 11., 0., 12., 0., 13., 0., 14., 0., 15.,
57013 );
57014 assert_eq_m512(r, e);
57015 }
57016
57017 #[simd_test(enable = "avx512f,avx512vl")]
57018 unsafe fn test_mm256_mask_expand_ps() {
57019 let src = _mm256_set1_ps(200.);
57020 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
57021 let r = _mm256_mask_expand_ps(src, 0, a);
57022 assert_eq_m256(r, src);
57023 let r = _mm256_mask_expand_ps(src, 0b01010101, a);
57024 let e = _mm256_set_ps(200., 4., 200., 5., 200., 6., 200., 7.);
57025 assert_eq_m256(r, e);
57026 }
57027
57028 #[simd_test(enable = "avx512f,avx512vl")]
57029 unsafe fn test_mm256_maskz_expand_ps() {
57030 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
57031 let r = _mm256_maskz_expand_ps(0, a);
57032 assert_eq_m256(r, _mm256_setzero_ps());
57033 let r = _mm256_maskz_expand_ps(0b01010101, a);
57034 let e = _mm256_set_ps(0., 4., 0., 5., 0., 6., 0., 7.);
57035 assert_eq_m256(r, e);
57036 }
57037
57038 #[simd_test(enable = "avx512f,avx512vl")]
57039 unsafe fn test_mm_mask_expand_ps() {
57040 let src = _mm_set1_ps(200.);
57041 let a = _mm_set_ps(0., 1., 2., 3.);
57042 let r = _mm_mask_expand_ps(src, 0, a);
57043 assert_eq_m128(r, src);
57044 let r = _mm_mask_expand_ps(src, 0b00000101, a);
57045 let e = _mm_set_ps(200., 2., 200., 3.);
57046 assert_eq_m128(r, e);
57047 }
57048
57049 #[simd_test(enable = "avx512f,avx512vl")]
57050 unsafe fn test_mm_maskz_expand_ps() {
57051 let a = _mm_set_ps(0., 1., 2., 3.);
57052 let r = _mm_maskz_expand_ps(0, a);
57053 assert_eq_m128(r, _mm_setzero_ps());
57054 let r = _mm_maskz_expand_ps(0b00000101, a);
57055 let e = _mm_set_ps(0., 2., 0., 3.);
57056 assert_eq_m128(r, e);
57057 }
57058
57059 #[simd_test(enable = "avx512f")]
57060 unsafe fn test_mm512_loadu_epi32() {
57061 let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
57062 let p = a.as_ptr();
57063 let r = _mm512_loadu_epi32(black_box(p));
57064 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57065 assert_eq_m512i(r, e);
57066 }
57067
57068 #[simd_test(enable = "avx512f,avx512vl")]
57069 unsafe fn test_mm256_loadu_epi32() {
57070 let a = &[4, 3, 2, 5, 8, 9, 64, 50];
57071 let p = a.as_ptr();
57072 let r = _mm256_loadu_epi32(black_box(p));
57073 let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
57074 assert_eq_m256i(r, e);
57075 }
57076
57077 #[simd_test(enable = "avx512f,avx512vl")]
57078 unsafe fn test_mm_loadu_epi32() {
57079 let a = &[4, 3, 2, 5];
57080 let p = a.as_ptr();
57081 let r = _mm_loadu_epi32(black_box(p));
57082 let e = _mm_setr_epi32(4, 3, 2, 5);
57083 assert_eq_m128i(r, e);
57084 }
57085
57086 #[simd_test(enable = "avx512f")]
57087 unsafe fn test_mm512_mask_cvtepi32_storeu_epi16() {
57088 let a = _mm512_set1_epi32(9);
57089 let mut r = _mm256_undefined_si256();
57090 _mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
57091 let e = _mm256_set1_epi16(9);
57092 assert_eq_m256i(r, e);
57093 }
57094
57095 #[simd_test(enable = "avx512f,avx512vl")]
57096 unsafe fn test_mm256_mask_cvtepi32_storeu_epi16() {
57097 let a = _mm256_set1_epi32(9);
57098 let mut r = _mm_undefined_si128();
57099 _mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57100 let e = _mm_set1_epi16(9);
57101 assert_eq_m128i(r, e);
57102 }
57103
57104 #[simd_test(enable = "avx512f,avx512vl")]
57105 unsafe fn test_mm_mask_cvtepi32_storeu_epi16() {
57106 let a = _mm_set1_epi32(9);
57107 let mut r = _mm_set1_epi8(0);
57108 _mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57109 let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9);
57110 assert_eq_m128i(r, e);
57111 }
57112
57113 #[simd_test(enable = "avx512f")]
57114 unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() {
57115 let a = _mm512_set1_epi32(i32::MAX);
57116 let mut r = _mm256_undefined_si256();
57117 _mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
57118 let e = _mm256_set1_epi16(i16::MAX);
57119 assert_eq_m256i(r, e);
57120 }
57121
57122 #[simd_test(enable = "avx512f,avx512vl")]
57123 unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() {
57124 let a = _mm256_set1_epi32(i32::MAX);
57125 let mut r = _mm_undefined_si128();
57126 _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57127 let e = _mm_set1_epi16(i16::MAX);
57128 assert_eq_m128i(r, e);
57129 }
57130
57131 #[simd_test(enable = "avx512f,avx512vl")]
57132 unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() {
57133 let a = _mm_set1_epi32(i32::MAX);
57134 let mut r = _mm_set1_epi8(0);
57135 _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57136 let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
57137 assert_eq_m128i(r, e);
57138 }
57139
57140 #[simd_test(enable = "avx512f")]
57141 unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() {
57142 let a = _mm512_set1_epi32(i32::MAX);
57143 let mut r = _mm256_undefined_si256();
57144 _mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111_11111111, a);
57145 let e = _mm256_set1_epi16(u16::MAX as i16);
57146 assert_eq_m256i(r, e);
57147 }
57148
57149 #[simd_test(enable = "avx512f,avx512vl")]
57150 unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() {
57151 let a = _mm256_set1_epi32(i32::MAX);
57152 let mut r = _mm_undefined_si128();
57153 _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57154 let e = _mm_set1_epi16(u16::MAX as i16);
57155 assert_eq_m128i(r, e);
57156 }
57157
57158 #[simd_test(enable = "avx512f,avx512vl")]
57159 unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() {
57160 let a = _mm_set1_epi32(i32::MAX);
57161 let mut r = _mm_set1_epi8(0);
57162 _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i16, 0b11111111, a);
57163 let e = _mm_set_epi16(
57164 0,
57165 0,
57166 0,
57167 0,
57168 u16::MAX as i16,
57169 u16::MAX as i16,
57170 u16::MAX as i16,
57171 u16::MAX as i16,
57172 );
57173 assert_eq_m128i(r, e);
57174 }
57175
57176 #[simd_test(enable = "avx512f")]
57177 unsafe fn test_mm512_mask_cvtepi32_storeu_epi8() {
57178 let a = _mm512_set1_epi32(9);
57179 let mut r = _mm_undefined_si128();
57180 _mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57181 let e = _mm_set1_epi8(9);
57182 assert_eq_m128i(r, e);
57183 }
57184
57185 #[simd_test(enable = "avx512f,avx512vl")]
57186 unsafe fn test_mm256_mask_cvtepi32_storeu_epi8() {
57187 let a = _mm256_set1_epi32(9);
57188 let mut r = _mm_set1_epi8(0);
57189 _mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57190 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9);
57191 assert_eq_m128i(r, e);
57192 }
57193
57194 #[simd_test(enable = "avx512f,avx512vl")]
57195 unsafe fn test_mm_mask_cvtepi32_storeu_epi8() {
57196 let a = _mm_set1_epi32(9);
57197 let mut r = _mm_set1_epi8(0);
57198 _mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57199 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9);
57200 assert_eq_m128i(r, e);
57201 }
57202
57203 #[simd_test(enable = "avx512f")]
57204 unsafe fn test_mm512_mask_cvtsepi32_storeu_epi8() {
57205 let a = _mm512_set1_epi32(i32::MAX);
57206 let mut r = _mm_undefined_si128();
57207 _mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57208 let e = _mm_set1_epi8(i8::MAX);
57209 assert_eq_m128i(r, e);
57210 }
57211
57212 #[simd_test(enable = "avx512f,avx512vl")]
57213 unsafe fn test_mm256_mask_cvtsepi32_storeu_epi8() {
57214 let a = _mm256_set1_epi32(i32::MAX);
57215 let mut r = _mm_set1_epi8(0);
57216 _mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57217 #[rustfmt::skip]
57218 let e = _mm_set_epi8(
57219 0, 0, 0, 0,
57220 0, 0, 0, 0,
57221 i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57222 i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57223 );
57224 assert_eq_m128i(r, e);
57225 }
57226
57227 #[simd_test(enable = "avx512f,avx512vl")]
57228 unsafe fn test_mm_mask_cvtsepi32_storeu_epi8() {
57229 let a = _mm_set1_epi32(i32::MAX);
57230 let mut r = _mm_set1_epi8(0);
57231 _mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57232 #[rustfmt::skip]
57233 let e = _mm_set_epi8(
57234 0, 0, 0, 0,
57235 0, 0, 0, 0,
57236 0, 0, 0, 0,
57237 i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57238 );
57239 assert_eq_m128i(r, e);
57240 }
57241
57242 #[simd_test(enable = "avx512f")]
57243 unsafe fn test_mm512_mask_cvtusepi32_storeu_epi8() {
57244 let a = _mm512_set1_epi32(i32::MAX);
57245 let mut r = _mm_undefined_si128();
57246 _mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57247 let e = _mm_set1_epi8(u8::MAX as i8);
57248 assert_eq_m128i(r, e);
57249 }
57250
57251 #[simd_test(enable = "avx512f,avx512vl")]
57252 unsafe fn test_mm256_mask_cvtusepi32_storeu_epi8() {
57253 let a = _mm256_set1_epi32(i32::MAX);
57254 let mut r = _mm_set1_epi8(0);
57255 _mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57256 #[rustfmt::skip]
57257 let e = _mm_set_epi8(
57258 0, 0, 0, 0,
57259 0, 0, 0, 0,
57260 u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57261 u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57262 );
57263 assert_eq_m128i(r, e);
57264 }
57265
57266 #[simd_test(enable = "avx512f,avx512vl")]
57267 unsafe fn test_mm_mask_cvtusepi32_storeu_epi8() {
57268 let a = _mm_set1_epi32(i32::MAX);
57269 let mut r = _mm_set1_epi8(0);
57270 _mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57271 #[rustfmt::skip]
57272 let e = _mm_set_epi8(
57273 0, 0, 0, 0,
57274 0, 0, 0, 0,
57275 0, 0, 0, 0,
57276 u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57277 );
57278 assert_eq_m128i(r, e);
57279 }
57280
57281 #[simd_test(enable = "avx512f")]
57282 unsafe fn test_mm512_storeu_epi32() {
57283 let a = _mm512_set1_epi32(9);
57284 let mut r = _mm512_undefined_epi32();
57285 _mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57286 assert_eq_m512i(r, a);
57287 }
57288
57289 #[simd_test(enable = "avx512f,avx512vl")]
57290 unsafe fn test_mm256_storeu_epi32() {
57291 let a = _mm256_set1_epi32(9);
57292 let mut r = _mm256_undefined_si256();
57293 _mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57294 assert_eq_m256i(r, a);
57295 }
57296
57297 #[simd_test(enable = "avx512f,avx512vl")]
57298 unsafe fn test_mm_storeu_epi32() {
57299 let a = _mm_set1_epi32(9);
57300 let mut r = _mm_undefined_si128();
57301 _mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57302 assert_eq_m128i(r, a);
57303 }
57304
57305 #[simd_test(enable = "avx512f")]
57306 unsafe fn test_mm512_loadu_si512() {
57307 let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
57308 let p = a.as_ptr().cast();
57309 let r = _mm512_loadu_si512(black_box(p));
57310 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57311 assert_eq_m512i(r, e);
57312 }
57313
57314 #[simd_test(enable = "avx512f")]
57315 unsafe fn test_mm512_storeu_si512() {
57316 let a = _mm512_set1_epi32(9);
57317 let mut r = _mm512_undefined_epi32();
57318 _mm512_storeu_si512(&mut r as *mut _, a);
57319 assert_eq_m512i(r, a);
57320 }
57321
57322 #[simd_test(enable = "avx512f")]
57323 unsafe fn test_mm512_load_si512() {
57324 #[repr(align(64))]
57325 struct Align {
57326 data: [i32; 16], // 64 bytes
57327 }
57328 let a = Align {
57329 data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
57330 };
57331 let p = (a.data).as_ptr().cast();
57332 let r = _mm512_load_si512(black_box(p));
57333 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57334 assert_eq_m512i(r, e);
57335 }
57336
57337 #[simd_test(enable = "avx512f")]
57338 unsafe fn test_mm512_store_si512() {
57339 let a = _mm512_set1_epi32(9);
57340 let mut r = _mm512_undefined_epi32();
57341 _mm512_store_si512(&mut r as *mut _, a);
57342 assert_eq_m512i(r, a);
57343 }
57344
57345 #[simd_test(enable = "avx512f")]
57346 unsafe fn test_mm512_load_epi32() {
57347 #[repr(align(64))]
57348 struct Align {
57349 data: [i32; 16], // 64 bytes
57350 }
57351 let a = Align {
57352 data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
57353 };
57354 let p = (a.data).as_ptr();
57355 let r = _mm512_load_epi32(black_box(p));
57356 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57357 assert_eq_m512i(r, e);
57358 }
57359
57360 #[simd_test(enable = "avx512f,avx512vl")]
57361 unsafe fn test_mm256_load_epi32() {
57362 #[repr(align(64))]
57363 struct Align {
57364 data: [i32; 8],
57365 }
57366 let a = Align {
57367 data: [4, 3, 2, 5, 8, 9, 64, 50],
57368 };
57369 let p = (a.data).as_ptr();
57370 let r = _mm256_load_epi32(black_box(p));
57371 let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
57372 assert_eq_m256i(r, e);
57373 }
57374
57375 #[simd_test(enable = "avx512f,avx512vl")]
57376 unsafe fn test_mm_load_epi32() {
57377 #[repr(align(64))]
57378 struct Align {
57379 data: [i32; 4],
57380 }
57381 let a = Align { data: [4, 3, 2, 5] };
57382 let p = (a.data).as_ptr();
57383 let r = _mm_load_epi32(black_box(p));
57384 let e = _mm_setr_epi32(4, 3, 2, 5);
57385 assert_eq_m128i(r, e);
57386 }
57387
57388 #[simd_test(enable = "avx512f")]
57389 unsafe fn test_mm512_store_epi32() {
57390 let a = _mm512_set1_epi32(9);
57391 let mut r = _mm512_undefined_epi32();
57392 _mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
57393 assert_eq_m512i(r, a);
57394 }
57395
57396 #[simd_test(enable = "avx512f,avx512vl")]
57397 unsafe fn test_mm256_store_epi32() {
57398 let a = _mm256_set1_epi32(9);
57399 let mut r = _mm256_undefined_si256();
57400 _mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
57401 assert_eq_m256i(r, a);
57402 }
57403
57404 #[simd_test(enable = "avx512f,avx512vl")]
57405 unsafe fn test_mm_store_epi32() {
57406 let a = _mm_set1_epi32(9);
57407 let mut r = _mm_undefined_si128();
57408 _mm_store_epi32(&mut r as *mut _ as *mut i32, a);
57409 assert_eq_m128i(r, a);
57410 }
57411
57412 #[simd_test(enable = "avx512f")]
57413 unsafe fn test_mm512_load_ps() {
57414 #[repr(align(64))]
57415 struct Align {
57416 data: [f32; 16], // 64 bytes
57417 }
57418 let a = Align {
57419 data: [
57420 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
57421 ],
57422 };
57423 let p = (a.data).as_ptr();
57424 let r = _mm512_load_ps(black_box(p));
57425 let e = _mm512_setr_ps(
57426 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
57427 );
57428 assert_eq_m512(r, e);
57429 }
57430
57431 #[simd_test(enable = "avx512f")]
57432 unsafe fn test_mm512_store_ps() {
57433 let a = _mm512_set1_ps(9.);
57434 let mut r = _mm512_undefined_ps();
57435 _mm512_store_ps(&mut r as *mut _ as *mut f32, a);
57436 assert_eq_m512(r, a);
57437 }
57438
57439 #[simd_test(enable = "avx512f")]
57440 unsafe fn test_mm512_mask_set1_epi32() {
57441 let src = _mm512_set1_epi32(2);
57442 let a: i32 = 11;
57443 let r = _mm512_mask_set1_epi32(src, 0, a);
57444 assert_eq_m512i(r, src);
57445 let r = _mm512_mask_set1_epi32(src, 0b11111111_11111111, a);
57446 let e = _mm512_set1_epi32(11);
57447 assert_eq_m512i(r, e);
57448 }
57449
57450 #[simd_test(enable = "avx512f")]
57451 unsafe fn test_mm512_maskz_set1_epi32() {
57452 let a: i32 = 11;
57453 let r = _mm512_maskz_set1_epi32(0, a);
57454 assert_eq_m512i(r, _mm512_setzero_si512());
57455 let r = _mm512_maskz_set1_epi32(0b11111111_11111111, a);
57456 let e = _mm512_set1_epi32(11);
57457 assert_eq_m512i(r, e);
57458 }
57459
57460 #[simd_test(enable = "avx512f,avx512vl")]
57461 unsafe fn test_mm256_mask_set1_epi32() {
57462 let src = _mm256_set1_epi32(2);
57463 let a: i32 = 11;
57464 let r = _mm256_mask_set1_epi32(src, 0, a);
57465 assert_eq_m256i(r, src);
57466 let r = _mm256_mask_set1_epi32(src, 0b11111111, a);
57467 let e = _mm256_set1_epi32(11);
57468 assert_eq_m256i(r, e);
57469 }
57470
57471 #[simd_test(enable = "avx512f")]
57472 unsafe fn test_mm256_maskz_set1_epi32() {
57473 let a: i32 = 11;
57474 let r = _mm256_maskz_set1_epi32(0, a);
57475 assert_eq_m256i(r, _mm256_setzero_si256());
57476 let r = _mm256_maskz_set1_epi32(0b11111111, a);
57477 let e = _mm256_set1_epi32(11);
57478 assert_eq_m256i(r, e);
57479 }
57480
57481 #[simd_test(enable = "avx512f,avx512vl")]
57482 unsafe fn test_mm_mask_set1_epi32() {
57483 let src = _mm_set1_epi32(2);
57484 let a: i32 = 11;
57485 let r = _mm_mask_set1_epi32(src, 0, a);
57486 assert_eq_m128i(r, src);
57487 let r = _mm_mask_set1_epi32(src, 0b00001111, a);
57488 let e = _mm_set1_epi32(11);
57489 assert_eq_m128i(r, e);
57490 }
57491
57492 #[simd_test(enable = "avx512f")]
57493 unsafe fn test_mm_maskz_set1_epi32() {
57494 let a: i32 = 11;
57495 let r = _mm_maskz_set1_epi32(0, a);
57496 assert_eq_m128i(r, _mm_setzero_si128());
57497 let r = _mm_maskz_set1_epi32(0b00001111, a);
57498 let e = _mm_set1_epi32(11);
57499 assert_eq_m128i(r, e);
57500 }
57501
57502 #[simd_test(enable = "avx512f")]
57503 unsafe fn test_mm_mask_move_ss() {
57504 let src = _mm_set_ps(10., 11., 100., 110.);
57505 let a = _mm_set_ps(1., 2., 10., 20.);
57506 let b = _mm_set_ps(3., 4., 30., 40.);
57507 let r = _mm_mask_move_ss(src, 0, a, b);
57508 let e = _mm_set_ps(1., 2., 10., 110.);
57509 assert_eq_m128(r, e);
57510 let r = _mm_mask_move_ss(src, 0b11111111, a, b);
57511 let e = _mm_set_ps(1., 2., 10., 40.);
57512 assert_eq_m128(r, e);
57513 }
57514
57515 #[simd_test(enable = "avx512f")]
57516 unsafe fn test_mm_maskz_move_ss() {
57517 let a = _mm_set_ps(1., 2., 10., 20.);
57518 let b = _mm_set_ps(3., 4., 30., 40.);
57519 let r = _mm_maskz_move_ss(0, a, b);
57520 let e = _mm_set_ps(1., 2., 10., 0.);
57521 assert_eq_m128(r, e);
57522 let r = _mm_maskz_move_ss(0b11111111, a, b);
57523 let e = _mm_set_ps(1., 2., 10., 40.);
57524 assert_eq_m128(r, e);
57525 }
57526
57527 #[simd_test(enable = "avx512f")]
57528 unsafe fn test_mm_mask_move_sd() {
57529 let src = _mm_set_pd(10., 11.);
57530 let a = _mm_set_pd(1., 2.);
57531 let b = _mm_set_pd(3., 4.);
57532 let r = _mm_mask_move_sd(src, 0, a, b);
57533 let e = _mm_set_pd(1., 11.);
57534 assert_eq_m128d(r, e);
57535 let r = _mm_mask_move_sd(src, 0b11111111, a, b);
57536 let e = _mm_set_pd(1., 4.);
57537 assert_eq_m128d(r, e);
57538 }
57539
57540 #[simd_test(enable = "avx512f")]
57541 unsafe fn test_mm_maskz_move_sd() {
57542 let a = _mm_set_pd(1., 2.);
57543 let b = _mm_set_pd(3., 4.);
57544 let r = _mm_maskz_move_sd(0, a, b);
57545 let e = _mm_set_pd(1., 0.);
57546 assert_eq_m128d(r, e);
57547 let r = _mm_maskz_move_sd(0b11111111, a, b);
57548 let e = _mm_set_pd(1., 4.);
57549 assert_eq_m128d(r, e);
57550 }
57551
57552 #[simd_test(enable = "avx512f")]
57553 unsafe fn test_mm_mask_add_ss() {
57554 let src = _mm_set_ps(10., 11., 100., 110.);
57555 let a = _mm_set_ps(1., 2., 10., 20.);
57556 let b = _mm_set_ps(3., 4., 30., 40.);
57557 let r = _mm_mask_add_ss(src, 0, a, b);
57558 let e = _mm_set_ps(1., 2., 10., 110.);
57559 assert_eq_m128(r, e);
57560 let r = _mm_mask_add_ss(src, 0b11111111, a, b);
57561 let e = _mm_set_ps(1., 2., 10., 60.);
57562 assert_eq_m128(r, e);
57563 }
57564
57565 #[simd_test(enable = "avx512f")]
57566 unsafe fn test_mm_maskz_add_ss() {
57567 let a = _mm_set_ps(1., 2., 10., 20.);
57568 let b = _mm_set_ps(3., 4., 30., 40.);
57569 let r = _mm_maskz_add_ss(0, a, b);
57570 let e = _mm_set_ps(1., 2., 10., 0.);
57571 assert_eq_m128(r, e);
57572 let r = _mm_maskz_add_ss(0b11111111, a, b);
57573 let e = _mm_set_ps(1., 2., 10., 60.);
57574 assert_eq_m128(r, e);
57575 }
57576
57577 #[simd_test(enable = "avx512f")]
57578 unsafe fn test_mm_mask_add_sd() {
57579 let src = _mm_set_pd(10., 11.);
57580 let a = _mm_set_pd(1., 2.);
57581 let b = _mm_set_pd(3., 4.);
57582 let r = _mm_mask_add_sd(src, 0, a, b);
57583 let e = _mm_set_pd(1., 11.);
57584 assert_eq_m128d(r, e);
57585 let r = _mm_mask_add_sd(src, 0b11111111, a, b);
57586 let e = _mm_set_pd(1., 6.);
57587 assert_eq_m128d(r, e);
57588 }
57589
57590 #[simd_test(enable = "avx512f")]
57591 unsafe fn test_mm_maskz_add_sd() {
57592 let a = _mm_set_pd(1., 2.);
57593 let b = _mm_set_pd(3., 4.);
57594 let r = _mm_maskz_add_sd(0, a, b);
57595 let e = _mm_set_pd(1., 0.);
57596 assert_eq_m128d(r, e);
57597 let r = _mm_maskz_add_sd(0b11111111, a, b);
57598 let e = _mm_set_pd(1., 6.);
57599 assert_eq_m128d(r, e);
57600 }
57601
57602 #[simd_test(enable = "avx512f")]
57603 unsafe fn test_mm_mask_sub_ss() {
57604 let src = _mm_set_ps(10., 11., 100., 110.);
57605 let a = _mm_set_ps(1., 2., 10., 20.);
57606 let b = _mm_set_ps(3., 4., 30., 40.);
57607 let r = _mm_mask_sub_ss(src, 0, a, b);
57608 let e = _mm_set_ps(1., 2., 10., 110.);
57609 assert_eq_m128(r, e);
57610 let r = _mm_mask_sub_ss(src, 0b11111111, a, b);
57611 let e = _mm_set_ps(1., 2., 10., -20.);
57612 assert_eq_m128(r, e);
57613 }
57614
57615 #[simd_test(enable = "avx512f")]
57616 unsafe fn test_mm_maskz_sub_ss() {
57617 let a = _mm_set_ps(1., 2., 10., 20.);
57618 let b = _mm_set_ps(3., 4., 30., 40.);
57619 let r = _mm_maskz_sub_ss(0, a, b);
57620 let e = _mm_set_ps(1., 2., 10., 0.);
57621 assert_eq_m128(r, e);
57622 let r = _mm_maskz_sub_ss(0b11111111, a, b);
57623 let e = _mm_set_ps(1., 2., 10., -20.);
57624 assert_eq_m128(r, e);
57625 }
57626
57627 #[simd_test(enable = "avx512f")]
57628 unsafe fn test_mm_mask_sub_sd() {
57629 let src = _mm_set_pd(10., 11.);
57630 let a = _mm_set_pd(1., 2.);
57631 let b = _mm_set_pd(3., 4.);
57632 let r = _mm_mask_sub_sd(src, 0, a, b);
57633 let e = _mm_set_pd(1., 11.);
57634 assert_eq_m128d(r, e);
57635 let r = _mm_mask_sub_sd(src, 0b11111111, a, b);
57636 let e = _mm_set_pd(1., -2.);
57637 assert_eq_m128d(r, e);
57638 }
57639
57640 #[simd_test(enable = "avx512f")]
57641 unsafe fn test_mm_maskz_sub_sd() {
57642 let a = _mm_set_pd(1., 2.);
57643 let b = _mm_set_pd(3., 4.);
57644 let r = _mm_maskz_sub_sd(0, a, b);
57645 let e = _mm_set_pd(1., 0.);
57646 assert_eq_m128d(r, e);
57647 let r = _mm_maskz_sub_sd(0b11111111, a, b);
57648 let e = _mm_set_pd(1., -2.);
57649 assert_eq_m128d(r, e);
57650 }
57651
57652 #[simd_test(enable = "avx512f")]
57653 unsafe fn test_mm_mask_mul_ss() {
57654 let src = _mm_set_ps(10., 11., 100., 110.);
57655 let a = _mm_set_ps(1., 2., 10., 20.);
57656 let b = _mm_set_ps(3., 4., 30., 40.);
57657 let r = _mm_mask_mul_ss(src, 0, a, b);
57658 let e = _mm_set_ps(1., 2., 10., 110.);
57659 assert_eq_m128(r, e);
57660 let r = _mm_mask_mul_ss(src, 0b11111111, a, b);
57661 let e = _mm_set_ps(1., 2., 10., 800.);
57662 assert_eq_m128(r, e);
57663 }
57664
57665 #[simd_test(enable = "avx512f")]
57666 unsafe fn test_mm_maskz_mul_ss() {
57667 let a = _mm_set_ps(1., 2., 10., 20.);
57668 let b = _mm_set_ps(3., 4., 30., 40.);
57669 let r = _mm_maskz_mul_ss(0, a, b);
57670 let e = _mm_set_ps(1., 2., 10., 0.);
57671 assert_eq_m128(r, e);
57672 let r = _mm_maskz_mul_ss(0b11111111, a, b);
57673 let e = _mm_set_ps(1., 2., 10., 800.);
57674 assert_eq_m128(r, e);
57675 }
57676
57677 #[simd_test(enable = "avx512f")]
57678 unsafe fn test_mm_mask_mul_sd() {
57679 let src = _mm_set_pd(10., 11.);
57680 let a = _mm_set_pd(1., 2.);
57681 let b = _mm_set_pd(3., 4.);
57682 let r = _mm_mask_mul_sd(src, 0, a, b);
57683 let e = _mm_set_pd(1., 11.);
57684 assert_eq_m128d(r, e);
57685 let r = _mm_mask_mul_sd(src, 0b11111111, a, b);
57686 let e = _mm_set_pd(1., 8.);
57687 assert_eq_m128d(r, e);
57688 }
57689
57690 #[simd_test(enable = "avx512f")]
57691 unsafe fn test_mm_maskz_mul_sd() {
57692 let a = _mm_set_pd(1., 2.);
57693 let b = _mm_set_pd(3., 4.);
57694 let r = _mm_maskz_mul_sd(0, a, b);
57695 let e = _mm_set_pd(1., 0.);
57696 assert_eq_m128d(r, e);
57697 let r = _mm_maskz_mul_sd(0b11111111, a, b);
57698 let e = _mm_set_pd(1., 8.);
57699 assert_eq_m128d(r, e);
57700 }
57701
57702 #[simd_test(enable = "avx512f")]
57703 unsafe fn test_mm_mask_div_ss() {
57704 let src = _mm_set_ps(10., 11., 100., 110.);
57705 let a = _mm_set_ps(1., 2., 10., 20.);
57706 let b = _mm_set_ps(3., 4., 30., 40.);
57707 let r = _mm_mask_div_ss(src, 0, a, b);
57708 let e = _mm_set_ps(1., 2., 10., 110.);
57709 assert_eq_m128(r, e);
57710 let r = _mm_mask_div_ss(src, 0b11111111, a, b);
57711 let e = _mm_set_ps(1., 2., 10., 0.5);
57712 assert_eq_m128(r, e);
57713 }
57714
57715 #[simd_test(enable = "avx512f")]
57716 unsafe fn test_mm_maskz_div_ss() {
57717 let a = _mm_set_ps(1., 2., 10., 20.);
57718 let b = _mm_set_ps(3., 4., 30., 40.);
57719 let r = _mm_maskz_div_ss(0, a, b);
57720 let e = _mm_set_ps(1., 2., 10., 0.);
57721 assert_eq_m128(r, e);
57722 let r = _mm_maskz_div_ss(0b11111111, a, b);
57723 let e = _mm_set_ps(1., 2., 10., 0.5);
57724 assert_eq_m128(r, e);
57725 }
57726
57727 #[simd_test(enable = "avx512f")]
57728 unsafe fn test_mm_mask_div_sd() {
57729 let src = _mm_set_pd(10., 11.);
57730 let a = _mm_set_pd(1., 2.);
57731 let b = _mm_set_pd(3., 4.);
57732 let r = _mm_mask_div_sd(src, 0, a, b);
57733 let e = _mm_set_pd(1., 11.);
57734 assert_eq_m128d(r, e);
57735 let r = _mm_mask_div_sd(src, 0b11111111, a, b);
57736 let e = _mm_set_pd(1., 0.5);
57737 assert_eq_m128d(r, e);
57738 }
57739
57740 #[simd_test(enable = "avx512f")]
57741 unsafe fn test_mm_maskz_div_sd() {
57742 let a = _mm_set_pd(1., 2.);
57743 let b = _mm_set_pd(3., 4.);
57744 let r = _mm_maskz_div_sd(0, a, b);
57745 let e = _mm_set_pd(1., 0.);
57746 assert_eq_m128d(r, e);
57747 let r = _mm_maskz_div_sd(0b11111111, a, b);
57748 let e = _mm_set_pd(1., 0.5);
57749 assert_eq_m128d(r, e);
57750 }
57751
57752 #[simd_test(enable = "avx512f")]
57753 unsafe fn test_mm_mask_max_ss() {
57754 let a = _mm_set_ps(0., 1., 2., 3.);
57755 let b = _mm_set_ps(4., 5., 6., 7.);
57756 let r = _mm_mask_max_ss(a, 0, a, b);
57757 let e = _mm_set_ps(0., 1., 2., 3.);
57758 assert_eq_m128(r, e);
57759 let r = _mm_mask_max_ss(a, 0b11111111, a, b);
57760 let e = _mm_set_ps(0., 1., 2., 7.);
57761 assert_eq_m128(r, e);
57762 }
57763
57764 #[simd_test(enable = "avx512f")]
57765 unsafe fn test_mm_maskz_max_ss() {
57766 let a = _mm_set_ps(0., 1., 2., 3.);
57767 let b = _mm_set_ps(4., 5., 6., 7.);
57768 let r = _mm_maskz_max_ss(0, a, b);
57769 let e = _mm_set_ps(0., 1., 2., 0.);
57770 assert_eq_m128(r, e);
57771 let r = _mm_maskz_max_ss(0b11111111, a, b);
57772 let e = _mm_set_ps(0., 1., 2., 7.);
57773 assert_eq_m128(r, e);
57774 }
57775
57776 #[simd_test(enable = "avx512f")]
57777 unsafe fn test_mm_mask_max_sd() {
57778 let a = _mm_set_pd(0., 1.);
57779 let b = _mm_set_pd(2., 3.);
57780 let r = _mm_mask_max_sd(a, 0, a, b);
57781 let e = _mm_set_pd(0., 1.);
57782 assert_eq_m128d(r, e);
57783 let r = _mm_mask_max_sd(a, 0b11111111, a, b);
57784 let e = _mm_set_pd(0., 3.);
57785 assert_eq_m128d(r, e);
57786 }
57787
57788 #[simd_test(enable = "avx512f")]
57789 unsafe fn test_mm_maskz_max_sd() {
57790 let a = _mm_set_pd(0., 1.);
57791 let b = _mm_set_pd(2., 3.);
57792 let r = _mm_maskz_max_sd(0, a, b);
57793 let e = _mm_set_pd(0., 0.);
57794 assert_eq_m128d(r, e);
57795 let r = _mm_maskz_max_sd(0b11111111, a, b);
57796 let e = _mm_set_pd(0., 3.);
57797 assert_eq_m128d(r, e);
57798 }
57799
57800 #[simd_test(enable = "avx512f")]
57801 unsafe fn test_mm_mask_min_ss() {
57802 let a = _mm_set_ps(0., 1., 2., 3.);
57803 let b = _mm_set_ps(4., 5., 6., 7.);
57804 let r = _mm_mask_min_ss(a, 0, a, b);
57805 let e = _mm_set_ps(0., 1., 2., 3.);
57806 assert_eq_m128(r, e);
57807 let r = _mm_mask_min_ss(a, 0b11111111, a, b);
57808 let e = _mm_set_ps(0., 1., 2., 3.);
57809 assert_eq_m128(r, e);
57810 }
57811
57812 #[simd_test(enable = "avx512f")]
57813 unsafe fn test_mm_maskz_min_ss() {
57814 let a = _mm_set_ps(0., 1., 2., 3.);
57815 let b = _mm_set_ps(4., 5., 6., 7.);
57816 let r = _mm_maskz_min_ss(0, a, b);
57817 let e = _mm_set_ps(0., 1., 2., 0.);
57818 assert_eq_m128(r, e);
57819 let r = _mm_maskz_min_ss(0b11111111, a, b);
57820 let e = _mm_set_ps(0., 1., 2., 3.);
57821 assert_eq_m128(r, e);
57822 }
57823
57824 #[simd_test(enable = "avx512f")]
57825 unsafe fn test_mm_mask_min_sd() {
57826 let a = _mm_set_pd(0., 1.);
57827 let b = _mm_set_pd(2., 3.);
57828 let r = _mm_mask_min_sd(a, 0, a, b);
57829 let e = _mm_set_pd(0., 1.);
57830 assert_eq_m128d(r, e);
57831 let r = _mm_mask_min_sd(a, 0b11111111, a, b);
57832 let e = _mm_set_pd(0., 1.);
57833 assert_eq_m128d(r, e);
57834 }
57835
57836 #[simd_test(enable = "avx512f")]
57837 unsafe fn test_mm_maskz_min_sd() {
57838 let a = _mm_set_pd(0., 1.);
57839 let b = _mm_set_pd(2., 3.);
57840 let r = _mm_maskz_min_sd(0, a, b);
57841 let e = _mm_set_pd(0., 0.);
57842 assert_eq_m128d(r, e);
57843 let r = _mm_maskz_min_sd(0b11111111, a, b);
57844 let e = _mm_set_pd(0., 1.);
57845 assert_eq_m128d(r, e);
57846 }
57847
57848 #[simd_test(enable = "avx512f")]
57849 unsafe fn test_mm_mask_sqrt_ss() {
57850 let src = _mm_set_ps(10., 11., 100., 110.);
57851 let a = _mm_set_ps(1., 2., 10., 20.);
57852 let b = _mm_set_ps(3., 4., 30., 4.);
57853 let r = _mm_mask_sqrt_ss(src, 0, a, b);
57854 let e = _mm_set_ps(1., 2., 10., 110.);
57855 assert_eq_m128(r, e);
57856 let r = _mm_mask_sqrt_ss(src, 0b11111111, a, b);
57857 let e = _mm_set_ps(1., 2., 10., 2.);
57858 assert_eq_m128(r, e);
57859 }
57860
57861 #[simd_test(enable = "avx512f")]
57862 unsafe fn test_mm_maskz_sqrt_ss() {
57863 let a = _mm_set_ps(1., 2., 10., 20.);
57864 let b = _mm_set_ps(3., 4., 30., 4.);
57865 let r = _mm_maskz_sqrt_ss(0, a, b);
57866 let e = _mm_set_ps(1., 2., 10., 0.);
57867 assert_eq_m128(r, e);
57868 let r = _mm_maskz_sqrt_ss(0b11111111, a, b);
57869 let e = _mm_set_ps(1., 2., 10., 2.);
57870 assert_eq_m128(r, e);
57871 }
57872
57873 #[simd_test(enable = "avx512f")]
57874 unsafe fn test_mm_mask_sqrt_sd() {
57875 let src = _mm_set_pd(10., 11.);
57876 let a = _mm_set_pd(1., 2.);
57877 let b = _mm_set_pd(3., 4.);
57878 let r = _mm_mask_sqrt_sd(src, 0, a, b);
57879 let e = _mm_set_pd(1., 11.);
57880 assert_eq_m128d(r, e);
57881 let r = _mm_mask_sqrt_sd(src, 0b11111111, a, b);
57882 let e = _mm_set_pd(1., 2.);
57883 assert_eq_m128d(r, e);
57884 }
57885
57886 #[simd_test(enable = "avx512f")]
57887 unsafe fn test_mm_maskz_sqrt_sd() {
57888 let a = _mm_set_pd(1., 2.);
57889 let b = _mm_set_pd(3., 4.);
57890 let r = _mm_maskz_sqrt_sd(0, a, b);
57891 let e = _mm_set_pd(1., 0.);
57892 assert_eq_m128d(r, e);
57893 let r = _mm_maskz_sqrt_sd(0b11111111, a, b);
57894 let e = _mm_set_pd(1., 2.);
57895 assert_eq_m128d(r, e);
57896 }
57897
57898 #[simd_test(enable = "avx512f")]
57899 unsafe fn test_mm_rsqrt14_ss() {
57900 let a = _mm_set_ps(1., 2., 10., 20.);
57901 let b = _mm_set_ps(3., 4., 30., 4.);
57902 let r = _mm_rsqrt14_ss(a, b);
57903 let e = _mm_set_ps(1., 2., 10., 0.5);
57904 assert_eq_m128(r, e);
57905 }
57906
57907 #[simd_test(enable = "avx512f")]
57908 unsafe fn test_mm_mask_rsqrt14_ss() {
57909 let src = _mm_set_ps(10., 11., 100., 110.);
57910 let a = _mm_set_ps(1., 2., 10., 20.);
57911 let b = _mm_set_ps(3., 4., 30., 4.);
57912 let r = _mm_mask_rsqrt14_ss(src, 0, a, b);
57913 let e = _mm_set_ps(1., 2., 10., 110.);
57914 assert_eq_m128(r, e);
57915 let r = _mm_mask_rsqrt14_ss(src, 0b11111111, a, b);
57916 let e = _mm_set_ps(1., 2., 10., 0.5);
57917 assert_eq_m128(r, e);
57918 }
57919
57920 #[simd_test(enable = "avx512f")]
57921 unsafe fn test_mm_maskz_rsqrt14_ss() {
57922 let a = _mm_set_ps(1., 2., 10., 20.);
57923 let b = _mm_set_ps(3., 4., 30., 4.);
57924 let r = _mm_maskz_rsqrt14_ss(0, a, b);
57925 let e = _mm_set_ps(1., 2., 10., 0.);
57926 assert_eq_m128(r, e);
57927 let r = _mm_maskz_rsqrt14_ss(0b11111111, a, b);
57928 let e = _mm_set_ps(1., 2., 10., 0.5);
57929 assert_eq_m128(r, e);
57930 }
57931
57932 #[simd_test(enable = "avx512f")]
57933 unsafe fn test_mm_rsqrt14_sd() {
57934 let a = _mm_set_pd(1., 2.);
57935 let b = _mm_set_pd(3., 4.);
57936 let r = _mm_rsqrt14_sd(a, b);
57937 let e = _mm_set_pd(1., 0.5);
57938 assert_eq_m128d(r, e);
57939 }
57940
57941 #[simd_test(enable = "avx512f")]
57942 unsafe fn test_mm_mask_rsqrt14_sd() {
57943 let src = _mm_set_pd(10., 11.);
57944 let a = _mm_set_pd(1., 2.);
57945 let b = _mm_set_pd(3., 4.);
57946 let r = _mm_mask_rsqrt14_sd(src, 0, a, b);
57947 let e = _mm_set_pd(1., 11.);
57948 assert_eq_m128d(r, e);
57949 let r = _mm_mask_rsqrt14_sd(src, 0b11111111, a, b);
57950 let e = _mm_set_pd(1., 0.5);
57951 assert_eq_m128d(r, e);
57952 }
57953
57954 #[simd_test(enable = "avx512f")]
57955 unsafe fn test_mm_maskz_rsqrt14_sd() {
57956 let a = _mm_set_pd(1., 2.);
57957 let b = _mm_set_pd(3., 4.);
57958 let r = _mm_maskz_rsqrt14_sd(0, a, b);
57959 let e = _mm_set_pd(1., 0.);
57960 assert_eq_m128d(r, e);
57961 let r = _mm_maskz_rsqrt14_sd(0b11111111, a, b);
57962 let e = _mm_set_pd(1., 0.5);
57963 assert_eq_m128d(r, e);
57964 }
57965
57966 #[simd_test(enable = "avx512f")]
57967 unsafe fn test_mm_rcp14_ss() {
57968 let a = _mm_set_ps(1., 2., 10., 20.);
57969 let b = _mm_set_ps(3., 4., 30., 4.);
57970 let r = _mm_rcp14_ss(a, b);
57971 let e = _mm_set_ps(1., 2., 10., 0.25);
57972 assert_eq_m128(r, e);
57973 }
57974
57975 #[simd_test(enable = "avx512f")]
57976 unsafe fn test_mm_mask_rcp14_ss() {
57977 let src = _mm_set_ps(10., 11., 100., 110.);
57978 let a = _mm_set_ps(1., 2., 10., 20.);
57979 let b = _mm_set_ps(3., 4., 30., 4.);
57980 let r = _mm_mask_rcp14_ss(src, 0, a, b);
57981 let e = _mm_set_ps(1., 2., 10., 110.);
57982 assert_eq_m128(r, e);
57983 let r = _mm_mask_rcp14_ss(src, 0b11111111, a, b);
57984 let e = _mm_set_ps(1., 2., 10., 0.25);
57985 assert_eq_m128(r, e);
57986 }
57987
57988 #[simd_test(enable = "avx512f")]
57989 unsafe fn test_mm_maskz_rcp14_ss() {
57990 let a = _mm_set_ps(1., 2., 10., 20.);
57991 let b = _mm_set_ps(3., 4., 30., 4.);
57992 let r = _mm_maskz_rcp14_ss(0, a, b);
57993 let e = _mm_set_ps(1., 2., 10., 0.);
57994 assert_eq_m128(r, e);
57995 let r = _mm_maskz_rcp14_ss(0b11111111, a, b);
57996 let e = _mm_set_ps(1., 2., 10., 0.25);
57997 assert_eq_m128(r, e);
57998 }
57999
58000 #[simd_test(enable = "avx512f")]
58001 unsafe fn test_mm_rcp14_sd() {
58002 let a = _mm_set_pd(1., 2.);
58003 let b = _mm_set_pd(3., 4.);
58004 let r = _mm_rcp14_sd(a, b);
58005 let e = _mm_set_pd(1., 0.25);
58006 assert_eq_m128d(r, e);
58007 }
58008
58009 #[simd_test(enable = "avx512f")]
58010 unsafe fn test_mm_mask_rcp14_sd() {
58011 let src = _mm_set_pd(10., 11.);
58012 let a = _mm_set_pd(1., 2.);
58013 let b = _mm_set_pd(3., 4.);
58014 let r = _mm_mask_rcp14_sd(src, 0, a, b);
58015 let e = _mm_set_pd(1., 11.);
58016 assert_eq_m128d(r, e);
58017 let r = _mm_mask_rcp14_sd(src, 0b11111111, a, b);
58018 let e = _mm_set_pd(1., 0.25);
58019 assert_eq_m128d(r, e);
58020 }
58021
58022 #[simd_test(enable = "avx512f")]
58023 unsafe fn test_mm_maskz_rcp14_sd() {
58024 let a = _mm_set_pd(1., 2.);
58025 let b = _mm_set_pd(3., 4.);
58026 let r = _mm_maskz_rcp14_sd(0, a, b);
58027 let e = _mm_set_pd(1., 0.);
58028 assert_eq_m128d(r, e);
58029 let r = _mm_maskz_rcp14_sd(0b11111111, a, b);
58030 let e = _mm_set_pd(1., 0.25);
58031 assert_eq_m128d(r, e);
58032 }
58033
58034 #[simd_test(enable = "avx512f")]
58035 unsafe fn test_mm_getexp_ss() {
58036 let a = _mm_set1_ps(2.);
58037 let b = _mm_set1_ps(3.);
58038 let r = _mm_getexp_ss(a, b);
58039 let e = _mm_set_ps(2., 2., 2., 1.);
58040 assert_eq_m128(r, e);
58041 }
58042
58043 #[simd_test(enable = "avx512f")]
58044 unsafe fn test_mm_mask_getexp_ss() {
58045 let a = _mm_set1_ps(2.);
58046 let b = _mm_set1_ps(3.);
58047 let r = _mm_mask_getexp_ss(a, 0, a, b);
58048 let e = _mm_set_ps(2., 2., 2., 2.);
58049 assert_eq_m128(r, e);
58050 let r = _mm_mask_getexp_ss(a, 0b11111111, a, b);
58051 let e = _mm_set_ps(2., 2., 2., 1.);
58052 assert_eq_m128(r, e);
58053 }
58054
58055 #[simd_test(enable = "avx512f")]
58056 unsafe fn test_mm_maskz_getexp_ss() {
58057 let a = _mm_set1_ps(2.);
58058 let b = _mm_set1_ps(3.);
58059 let r = _mm_maskz_getexp_ss(0, a, b);
58060 let e = _mm_set_ps(2., 2., 2., 0.);
58061 assert_eq_m128(r, e);
58062 let r = _mm_maskz_getexp_ss(0b11111111, a, b);
58063 let e = _mm_set_ps(2., 2., 2., 1.);
58064 assert_eq_m128(r, e);
58065 }
58066
58067 #[simd_test(enable = "avx512f")]
58068 unsafe fn test_mm_getexp_sd() {
58069 let a = _mm_set1_pd(2.);
58070 let b = _mm_set1_pd(3.);
58071 let r = _mm_getexp_sd(a, b);
58072 let e = _mm_set_pd(2., 1.);
58073 assert_eq_m128d(r, e);
58074 }
58075
58076 #[simd_test(enable = "avx512f")]
58077 unsafe fn test_mm_mask_getexp_sd() {
58078 let a = _mm_set1_pd(2.);
58079 let b = _mm_set1_pd(3.);
58080 let r = _mm_mask_getexp_sd(a, 0, a, b);
58081 let e = _mm_set_pd(2., 2.);
58082 assert_eq_m128d(r, e);
58083 let r = _mm_mask_getexp_sd(a, 0b11111111, a, b);
58084 let e = _mm_set_pd(2., 1.);
58085 assert_eq_m128d(r, e);
58086 }
58087
58088 #[simd_test(enable = "avx512f")]
58089 unsafe fn test_mm_maskz_getexp_sd() {
58090 let a = _mm_set1_pd(2.);
58091 let b = _mm_set1_pd(3.);
58092 let r = _mm_maskz_getexp_sd(0, a, b);
58093 let e = _mm_set_pd(2., 0.);
58094 assert_eq_m128d(r, e);
58095 let r = _mm_maskz_getexp_sd(0b11111111, a, b);
58096 let e = _mm_set_pd(2., 1.);
58097 assert_eq_m128d(r, e);
58098 }
58099
58100 #[simd_test(enable = "avx512f")]
58101 unsafe fn test_mm_getmant_ss() {
58102 let a = _mm_set1_ps(20.);
58103 let b = _mm_set1_ps(10.);
58104 let r = _mm_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
58105 let e = _mm_set_ps(20., 20., 20., 1.25);
58106 assert_eq_m128(r, e);
58107 }
58108
58109 #[simd_test(enable = "avx512f")]
58110 unsafe fn test_mm_mask_getmant_ss() {
58111 let a = _mm_set1_ps(20.);
58112 let b = _mm_set1_ps(10.);
58113 let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
58114 let e = _mm_set_ps(20., 20., 20., 20.);
58115 assert_eq_m128(r, e);
58116 let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
58117 let e = _mm_set_ps(20., 20., 20., 1.25);
58118 assert_eq_m128(r, e);
58119 }
58120
58121 #[simd_test(enable = "avx512f")]
58122 unsafe fn test_mm_maskz_getmant_ss() {
58123 let a = _mm_set1_ps(20.);
58124 let b = _mm_set1_ps(10.);
58125 let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
58126 let e = _mm_set_ps(20., 20., 20., 0.);
58127 assert_eq_m128(r, e);
58128 let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
58129 let e = _mm_set_ps(20., 20., 20., 1.25);
58130 assert_eq_m128(r, e);
58131 }
58132
58133 #[simd_test(enable = "avx512f")]
58134 unsafe fn test_mm_getmant_sd() {
58135 let a = _mm_set1_pd(20.);
58136 let b = _mm_set1_pd(10.);
58137 let r = _mm_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
58138 let e = _mm_set_pd(20., 1.25);
58139 assert_eq_m128d(r, e);
58140 }
58141
58142 #[simd_test(enable = "avx512f")]
58143 unsafe fn test_mm_mask_getmant_sd() {
58144 let a = _mm_set1_pd(20.);
58145 let b = _mm_set1_pd(10.);
58146 let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
58147 let e = _mm_set_pd(20., 20.);
58148 assert_eq_m128d(r, e);
58149 let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
58150 let e = _mm_set_pd(20., 1.25);
58151 assert_eq_m128d(r, e);
58152 }
58153
58154 #[simd_test(enable = "avx512f")]
58155 unsafe fn test_mm_maskz_getmant_sd() {
58156 let a = _mm_set1_pd(20.);
58157 let b = _mm_set1_pd(10.);
58158 let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
58159 let e = _mm_set_pd(20., 0.);
58160 assert_eq_m128d(r, e);
58161 let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
58162 let e = _mm_set_pd(20., 1.25);
58163 assert_eq_m128d(r, e);
58164 }
58165
58166 #[simd_test(enable = "avx512f")]
58167 unsafe fn test_mm_roundscale_ss() {
58168 let a = _mm_set1_ps(2.2);
58169 let b = _mm_set1_ps(1.1);
58170 let r = _mm_roundscale_ss::<0>(a, b);
58171 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58172 assert_eq_m128(r, e);
58173 }
58174
58175 #[simd_test(enable = "avx512f")]
58176 unsafe fn test_mm_mask_roundscale_ss() {
58177 let a = _mm_set1_ps(2.2);
58178 let b = _mm_set1_ps(1.1);
58179 let r = _mm_mask_roundscale_ss::<0>(a, 0, a, b);
58180 let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
58181 assert_eq_m128(r, e);
58182 let r = _mm_mask_roundscale_ss::<0>(a, 0b11111111, a, b);
58183 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58184 assert_eq_m128(r, e);
58185 }
58186
58187 #[simd_test(enable = "avx512f")]
58188 unsafe fn test_mm_maskz_roundscale_ss() {
58189 let a = _mm_set1_ps(2.2);
58190 let b = _mm_set1_ps(1.1);
58191 let r = _mm_maskz_roundscale_ss::<0>(0, a, b);
58192 let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
58193 assert_eq_m128(r, e);
58194 let r = _mm_maskz_roundscale_ss::<0>(0b11111111, a, b);
58195 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58196 assert_eq_m128(r, e);
58197 }
58198
58199 #[simd_test(enable = "avx512f")]
58200 unsafe fn test_mm_roundscale_sd() {
58201 let a = _mm_set1_pd(2.2);
58202 let b = _mm_set1_pd(1.1);
58203 let r = _mm_roundscale_sd::<0>(a, b);
58204 let e = _mm_set_pd(2.2, 1.0);
58205 assert_eq_m128d(r, e);
58206 }
58207
58208 #[simd_test(enable = "avx512f")]
58209 unsafe fn test_mm_mask_roundscale_sd() {
58210 let a = _mm_set1_pd(2.2);
58211 let b = _mm_set1_pd(1.1);
58212 let r = _mm_mask_roundscale_sd::<0>(a, 0, a, b);
58213 let e = _mm_set_pd(2.2, 2.2);
58214 assert_eq_m128d(r, e);
58215 let r = _mm_mask_roundscale_sd::<0>(a, 0b11111111, a, b);
58216 let e = _mm_set_pd(2.2, 1.0);
58217 assert_eq_m128d(r, e);
58218 }
58219
58220 #[simd_test(enable = "avx512f")]
58221 unsafe fn test_mm_maskz_roundscale_sd() {
58222 let a = _mm_set1_pd(2.2);
58223 let b = _mm_set1_pd(1.1);
58224 let r = _mm_maskz_roundscale_sd::<0>(0, a, b);
58225 let e = _mm_set_pd(2.2, 0.0);
58226 assert_eq_m128d(r, e);
58227 let r = _mm_maskz_roundscale_sd::<0>(0b11111111, a, b);
58228 let e = _mm_set_pd(2.2, 1.0);
58229 assert_eq_m128d(r, e);
58230 }
58231
58232 #[simd_test(enable = "avx512f")]
58233 unsafe fn test_mm_scalef_ss() {
58234 let a = _mm_set1_ps(1.);
58235 let b = _mm_set1_ps(3.);
58236 let r = _mm_scalef_ss(a, b);
58237 let e = _mm_set_ps(1., 1., 1., 8.);
58238 assert_eq_m128(r, e);
58239 }
58240
58241 #[simd_test(enable = "avx512f")]
58242 unsafe fn test_mm_mask_scalef_ss() {
58243 let a = _mm_set1_ps(1.);
58244 let b = _mm_set1_ps(3.);
58245 let r = _mm_mask_scalef_ss(a, 0, a, b);
58246 let e = _mm_set_ps(1., 1., 1., 1.);
58247 assert_eq_m128(r, e);
58248 let r = _mm_mask_scalef_ss(a, 0b11111111, a, b);
58249 let e = _mm_set_ps(1., 1., 1., 8.);
58250 assert_eq_m128(r, e);
58251 }
58252
58253 #[simd_test(enable = "avx512f")]
58254 unsafe fn test_mm_maskz_scalef_ss() {
58255 let a = _mm_set1_ps(1.);
58256 let b = _mm_set1_ps(3.);
58257 let r = _mm_maskz_scalef_ss(0, a, b);
58258 let e = _mm_set_ps(1., 1., 1., 0.);
58259 assert_eq_m128(r, e);
58260 let r = _mm_maskz_scalef_ss(0b11111111, a, b);
58261 let e = _mm_set_ps(1., 1., 1., 8.);
58262 assert_eq_m128(r, e);
58263 }
58264
58265 #[simd_test(enable = "avx512f")]
58266 unsafe fn test_mm_scalef_sd() {
58267 let a = _mm_set1_pd(1.);
58268 let b = _mm_set1_pd(3.);
58269 let r = _mm_scalef_sd(a, b);
58270 let e = _mm_set_pd(1., 8.);
58271 assert_eq_m128d(r, e);
58272 }
58273
58274 #[simd_test(enable = "avx512f")]
58275 unsafe fn test_mm_mask_scalef_sd() {
58276 let a = _mm_set1_pd(1.);
58277 let b = _mm_set1_pd(3.);
58278 let r = _mm_mask_scalef_sd(a, 0, a, b);
58279 let e = _mm_set_pd(1., 1.);
58280 assert_eq_m128d(r, e);
58281 let r = _mm_mask_scalef_sd(a, 0b11111111, a, b);
58282 let e = _mm_set_pd(1., 8.);
58283 assert_eq_m128d(r, e);
58284 }
58285
58286 #[simd_test(enable = "avx512f")]
58287 unsafe fn test_mm_maskz_scalef_sd() {
58288 let a = _mm_set1_pd(1.);
58289 let b = _mm_set1_pd(3.);
58290 let r = _mm_maskz_scalef_sd(0, a, b);
58291 let e = _mm_set_pd(1., 0.);
58292 assert_eq_m128d(r, e);
58293 let r = _mm_maskz_scalef_sd(0b11111111, a, b);
58294 let e = _mm_set_pd(1., 8.);
58295 assert_eq_m128d(r, e);
58296 }
58297
58298 #[simd_test(enable = "avx512f")]
58299 unsafe fn test_mm_mask_fmadd_ss() {
58300 let a = _mm_set1_ps(1.);
58301 let b = _mm_set1_ps(2.);
58302 let c = _mm_set1_ps(3.);
58303 let r = _mm_mask_fmadd_ss(a, 0, b, c);
58304 assert_eq_m128(r, a);
58305 let r = _mm_mask_fmadd_ss(a, 0b11111111, b, c);
58306 let e = _mm_set_ps(1., 1., 1., 5.);
58307 assert_eq_m128(r, e);
58308 }
58309
58310 #[simd_test(enable = "avx512f")]
58311 unsafe fn test_mm_maskz_fmadd_ss() {
58312 let a = _mm_set1_ps(1.);
58313 let b = _mm_set1_ps(2.);
58314 let c = _mm_set1_ps(3.);
58315 let r = _mm_maskz_fmadd_ss(0, a, b, c);
58316 let e = _mm_set_ps(1., 1., 1., 0.);
58317 assert_eq_m128(r, e);
58318 let r = _mm_maskz_fmadd_ss(0b11111111, a, b, c);
58319 let e = _mm_set_ps(1., 1., 1., 5.);
58320 assert_eq_m128(r, e);
58321 }
58322
58323 #[simd_test(enable = "avx512f")]
58324 unsafe fn test_mm_mask3_fmadd_ss() {
58325 let a = _mm_set1_ps(1.);
58326 let b = _mm_set1_ps(2.);
58327 let c = _mm_set1_ps(3.);
58328 let r = _mm_mask3_fmadd_ss(a, b, c, 0);
58329 assert_eq_m128(r, c);
58330 let r = _mm_mask3_fmadd_ss(a, b, c, 0b11111111);
58331 let e = _mm_set_ps(3., 3., 3., 5.);
58332 assert_eq_m128(r, e);
58333 }
58334
58335 #[simd_test(enable = "avx512f")]
58336 unsafe fn test_mm_mask_fmadd_sd() {
58337 let a = _mm_set1_pd(1.);
58338 let b = _mm_set1_pd(2.);
58339 let c = _mm_set1_pd(3.);
58340 let r = _mm_mask_fmadd_sd(a, 0, b, c);
58341 assert_eq_m128d(r, a);
58342 let r = _mm_mask_fmadd_sd(a, 0b11111111, b, c);
58343 let e = _mm_set_pd(1., 5.);
58344 assert_eq_m128d(r, e);
58345 }
58346
58347 #[simd_test(enable = "avx512f")]
58348 unsafe fn test_mm_maskz_fmadd_sd() {
58349 let a = _mm_set1_pd(1.);
58350 let b = _mm_set1_pd(2.);
58351 let c = _mm_set1_pd(3.);
58352 let r = _mm_maskz_fmadd_sd(0, a, b, c);
58353 let e = _mm_set_pd(1., 0.);
58354 assert_eq_m128d(r, e);
58355 let r = _mm_maskz_fmadd_sd(0b11111111, a, b, c);
58356 let e = _mm_set_pd(1., 5.);
58357 assert_eq_m128d(r, e);
58358 }
58359
58360 #[simd_test(enable = "avx512f")]
58361 unsafe fn test_mm_mask3_fmadd_sd() {
58362 let a = _mm_set1_pd(1.);
58363 let b = _mm_set1_pd(2.);
58364 let c = _mm_set1_pd(3.);
58365 let r = _mm_mask3_fmadd_sd(a, b, c, 0);
58366 assert_eq_m128d(r, c);
58367 let r = _mm_mask3_fmadd_sd(a, b, c, 0b11111111);
58368 let e = _mm_set_pd(3., 5.);
58369 assert_eq_m128d(r, e);
58370 }
58371
58372 #[simd_test(enable = "avx512f")]
58373 unsafe fn test_mm_mask_fmsub_ss() {
58374 let a = _mm_set1_ps(1.);
58375 let b = _mm_set1_ps(2.);
58376 let c = _mm_set1_ps(3.);
58377 let r = _mm_mask_fmsub_ss(a, 0, b, c);
58378 assert_eq_m128(r, a);
58379 let r = _mm_mask_fmsub_ss(a, 0b11111111, b, c);
58380 let e = _mm_set_ps(1., 1., 1., -1.);
58381 assert_eq_m128(r, e);
58382 }
58383
58384 #[simd_test(enable = "avx512f")]
58385 unsafe fn test_mm_maskz_fmsub_ss() {
58386 let a = _mm_set1_ps(1.);
58387 let b = _mm_set1_ps(2.);
58388 let c = _mm_set1_ps(3.);
58389 let r = _mm_maskz_fmsub_ss(0, a, b, c);
58390 let e = _mm_set_ps(1., 1., 1., 0.);
58391 assert_eq_m128(r, e);
58392 let r = _mm_maskz_fmsub_ss(0b11111111, a, b, c);
58393 let e = _mm_set_ps(1., 1., 1., -1.);
58394 assert_eq_m128(r, e);
58395 }
58396
58397 #[simd_test(enable = "avx512f")]
58398 unsafe fn test_mm_mask3_fmsub_ss() {
58399 let a = _mm_set1_ps(1.);
58400 let b = _mm_set1_ps(2.);
58401 let c = _mm_set1_ps(3.);
58402 let r = _mm_mask3_fmsub_ss(a, b, c, 0);
58403 assert_eq_m128(r, c);
58404 let r = _mm_mask3_fmsub_ss(a, b, c, 0b11111111);
58405 let e = _mm_set_ps(3., 3., 3., -1.);
58406 assert_eq_m128(r, e);
58407 }
58408
58409 #[simd_test(enable = "avx512f")]
58410 unsafe fn test_mm_mask_fmsub_sd() {
58411 let a = _mm_set1_pd(1.);
58412 let b = _mm_set1_pd(2.);
58413 let c = _mm_set1_pd(3.);
58414 let r = _mm_mask_fmsub_sd(a, 0, b, c);
58415 assert_eq_m128d(r, a);
58416 let r = _mm_mask_fmsub_sd(a, 0b11111111, b, c);
58417 let e = _mm_set_pd(1., -1.);
58418 assert_eq_m128d(r, e);
58419 }
58420
58421 #[simd_test(enable = "avx512f")]
58422 unsafe fn test_mm_maskz_fmsub_sd() {
58423 let a = _mm_set1_pd(1.);
58424 let b = _mm_set1_pd(2.);
58425 let c = _mm_set1_pd(3.);
58426 let r = _mm_maskz_fmsub_sd(0, a, b, c);
58427 let e = _mm_set_pd(1., 0.);
58428 assert_eq_m128d(r, e);
58429 let r = _mm_maskz_fmsub_sd(0b11111111, a, b, c);
58430 let e = _mm_set_pd(1., -1.);
58431 assert_eq_m128d(r, e);
58432 }
58433
58434 #[simd_test(enable = "avx512f")]
58435 unsafe fn test_mm_mask3_fmsub_sd() {
58436 let a = _mm_set1_pd(1.);
58437 let b = _mm_set1_pd(2.);
58438 let c = _mm_set1_pd(3.);
58439 let r = _mm_mask3_fmsub_sd(a, b, c, 0);
58440 assert_eq_m128d(r, c);
58441 let r = _mm_mask3_fmsub_sd(a, b, c, 0b11111111);
58442 let e = _mm_set_pd(3., -1.);
58443 assert_eq_m128d(r, e);
58444 }
58445
58446 #[simd_test(enable = "avx512f")]
58447 unsafe fn test_mm_mask_fnmadd_ss() {
58448 let a = _mm_set1_ps(1.);
58449 let b = _mm_set1_ps(2.);
58450 let c = _mm_set1_ps(3.);
58451 let r = _mm_mask_fnmadd_ss(a, 0, b, c);
58452 assert_eq_m128(r, a);
58453 let r = _mm_mask_fnmadd_ss(a, 0b11111111, b, c);
58454 let e = _mm_set_ps(1., 1., 1., 1.);
58455 assert_eq_m128(r, e);
58456 }
58457
58458 #[simd_test(enable = "avx512f")]
58459 unsafe fn test_mm_maskz_fnmadd_ss() {
58460 let a = _mm_set1_ps(1.);
58461 let b = _mm_set1_ps(2.);
58462 let c = _mm_set1_ps(3.);
58463 let r = _mm_maskz_fnmadd_ss(0, a, b, c);
58464 let e = _mm_set_ps(1., 1., 1., 0.);
58465 assert_eq_m128(r, e);
58466 let r = _mm_maskz_fnmadd_ss(0b11111111, a, b, c);
58467 let e = _mm_set_ps(1., 1., 1., 1.);
58468 assert_eq_m128(r, e);
58469 }
58470
58471 #[simd_test(enable = "avx512f")]
58472 unsafe fn test_mm_mask3_fnmadd_ss() {
58473 let a = _mm_set1_ps(1.);
58474 let b = _mm_set1_ps(2.);
58475 let c = _mm_set1_ps(3.);
58476 let r = _mm_mask3_fnmadd_ss(a, b, c, 0);
58477 assert_eq_m128(r, c);
58478 let r = _mm_mask3_fnmadd_ss(a, b, c, 0b11111111);
58479 let e = _mm_set_ps(3., 3., 3., 1.);
58480 assert_eq_m128(r, e);
58481 }
58482
58483 #[simd_test(enable = "avx512f")]
58484 unsafe fn test_mm_mask_fnmadd_sd() {
58485 let a = _mm_set1_pd(1.);
58486 let b = _mm_set1_pd(2.);
58487 let c = _mm_set1_pd(3.);
58488 let r = _mm_mask_fnmadd_sd(a, 0, b, c);
58489 assert_eq_m128d(r, a);
58490 let r = _mm_mask_fnmadd_sd(a, 0b11111111, b, c);
58491 let e = _mm_set_pd(1., 1.);
58492 assert_eq_m128d(r, e);
58493 }
58494
58495 #[simd_test(enable = "avx512f")]
58496 unsafe fn test_mm_maskz_fnmadd_sd() {
58497 let a = _mm_set1_pd(1.);
58498 let b = _mm_set1_pd(2.);
58499 let c = _mm_set1_pd(3.);
58500 let r = _mm_maskz_fnmadd_sd(0, a, b, c);
58501 let e = _mm_set_pd(1., 0.);
58502 assert_eq_m128d(r, e);
58503 let r = _mm_maskz_fnmadd_sd(0b11111111, a, b, c);
58504 let e = _mm_set_pd(1., 1.);
58505 assert_eq_m128d(r, e);
58506 }
58507
58508 #[simd_test(enable = "avx512f")]
58509 unsafe fn test_mm_mask3_fnmadd_sd() {
58510 let a = _mm_set1_pd(1.);
58511 let b = _mm_set1_pd(2.);
58512 let c = _mm_set1_pd(3.);
58513 let r = _mm_mask3_fnmadd_sd(a, b, c, 0);
58514 assert_eq_m128d(r, c);
58515 let r = _mm_mask3_fnmadd_sd(a, b, c, 0b11111111);
58516 let e = _mm_set_pd(3., 1.);
58517 assert_eq_m128d(r, e);
58518 }
58519
58520 #[simd_test(enable = "avx512f")]
58521 unsafe fn test_mm_mask_fnmsub_ss() {
58522 let a = _mm_set1_ps(1.);
58523 let b = _mm_set1_ps(2.);
58524 let c = _mm_set1_ps(3.);
58525 let r = _mm_mask_fnmsub_ss(a, 0, b, c);
58526 assert_eq_m128(r, a);
58527 let r = _mm_mask_fnmsub_ss(a, 0b11111111, b, c);
58528 let e = _mm_set_ps(1., 1., 1., -5.);
58529 assert_eq_m128(r, e);
58530 }
58531
58532 #[simd_test(enable = "avx512f")]
58533 unsafe fn test_mm_maskz_fnmsub_ss() {
58534 let a = _mm_set1_ps(1.);
58535 let b = _mm_set1_ps(2.);
58536 let c = _mm_set1_ps(3.);
58537 let r = _mm_maskz_fnmsub_ss(0, a, b, c);
58538 let e = _mm_set_ps(1., 1., 1., 0.);
58539 assert_eq_m128(r, e);
58540 let r = _mm_maskz_fnmsub_ss(0b11111111, a, b, c);
58541 let e = _mm_set_ps(1., 1., 1., -5.);
58542 assert_eq_m128(r, e);
58543 }
58544
58545 #[simd_test(enable = "avx512f")]
58546 unsafe fn test_mm_mask3_fnmsub_ss() {
58547 let a = _mm_set1_ps(1.);
58548 let b = _mm_set1_ps(2.);
58549 let c = _mm_set1_ps(3.);
58550 let r = _mm_mask3_fnmsub_ss(a, b, c, 0);
58551 assert_eq_m128(r, c);
58552 let r = _mm_mask3_fnmsub_ss(a, b, c, 0b11111111);
58553 let e = _mm_set_ps(3., 3., 3., -5.);
58554 assert_eq_m128(r, e);
58555 }
58556
58557 #[simd_test(enable = "avx512f")]
58558 unsafe fn test_mm_mask_fnmsub_sd() {
58559 let a = _mm_set1_pd(1.);
58560 let b = _mm_set1_pd(2.);
58561 let c = _mm_set1_pd(3.);
58562 let r = _mm_mask_fnmsub_sd(a, 0, b, c);
58563 assert_eq_m128d(r, a);
58564 let r = _mm_mask_fnmsub_sd(a, 0b11111111, b, c);
58565 let e = _mm_set_pd(1., -5.);
58566 assert_eq_m128d(r, e);
58567 }
58568
58569 #[simd_test(enable = "avx512f")]
58570 unsafe fn test_mm_maskz_fnmsub_sd() {
58571 let a = _mm_set1_pd(1.);
58572 let b = _mm_set1_pd(2.);
58573 let c = _mm_set1_pd(3.);
58574 let r = _mm_maskz_fnmsub_sd(0, a, b, c);
58575 let e = _mm_set_pd(1., 0.);
58576 assert_eq_m128d(r, e);
58577 let r = _mm_maskz_fnmsub_sd(0b11111111, a, b, c);
58578 let e = _mm_set_pd(1., -5.);
58579 assert_eq_m128d(r, e);
58580 }
58581
58582 #[simd_test(enable = "avx512f")]
58583 unsafe fn test_mm_mask3_fnmsub_sd() {
58584 let a = _mm_set1_pd(1.);
58585 let b = _mm_set1_pd(2.);
58586 let c = _mm_set1_pd(3.);
58587 let r = _mm_mask3_fnmsub_sd(a, b, c, 0);
58588 assert_eq_m128d(r, c);
58589 let r = _mm_mask3_fnmsub_sd(a, b, c, 0b11111111);
58590 let e = _mm_set_pd(3., -5.);
58591 assert_eq_m128d(r, e);
58592 }
58593
58594 #[simd_test(enable = "avx512f")]
58595 unsafe fn test_mm_add_round_ss() {
58596 let a = _mm_set_ps(1., 2., 10., 20.);
58597 let b = _mm_set_ps(3., 4., 30., 40.);
58598 let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58599 let e = _mm_set_ps(1., 2., 10., 60.);
58600 assert_eq_m128(r, e);
58601 }
58602
58603 #[simd_test(enable = "avx512f")]
58604 unsafe fn test_mm_mask_add_round_ss() {
58605 let src = _mm_set_ps(10., 11., 100., 110.);
58606 let a = _mm_set_ps(1., 2., 10., 20.);
58607 let b = _mm_set_ps(3., 4., 30., 40.);
58608 let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58609 let e = _mm_set_ps(1., 2., 10., 110.);
58610 assert_eq_m128(r, e);
58611 let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58612 src, 0b11111111, a, b,
58613 );
58614 let e = _mm_set_ps(1., 2., 10., 60.);
58615 assert_eq_m128(r, e);
58616 }
58617
58618 #[simd_test(enable = "avx512f")]
58619 unsafe fn test_mm_maskz_add_round_ss() {
58620 let a = _mm_set_ps(1., 2., 10., 20.);
58621 let b = _mm_set_ps(3., 4., 30., 40.);
58622 let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58623 let e = _mm_set_ps(1., 2., 10., 0.);
58624 assert_eq_m128(r, e);
58625 let r =
58626 _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58627 let e = _mm_set_ps(1., 2., 10., 60.);
58628 assert_eq_m128(r, e);
58629 }
58630
58631 #[simd_test(enable = "avx512f")]
58632 unsafe fn test_mm_add_round_sd() {
58633 let a = _mm_set_pd(1., 2.);
58634 let b = _mm_set_pd(3., 4.);
58635 let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58636 let e = _mm_set_pd(1., 6.);
58637 assert_eq_m128d(r, e);
58638 }
58639
58640 #[simd_test(enable = "avx512f")]
58641 unsafe fn test_mm_mask_add_round_sd() {
58642 let src = _mm_set_pd(10., 11.);
58643 let a = _mm_set_pd(1., 2.);
58644 let b = _mm_set_pd(3., 4.);
58645 let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58646 let e = _mm_set_pd(1., 11.);
58647 assert_eq_m128d(r, e);
58648 let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58649 src, 0b11111111, a, b,
58650 );
58651 let e = _mm_set_pd(1., 6.);
58652 assert_eq_m128d(r, e);
58653 }
58654
58655 #[simd_test(enable = "avx512f")]
58656 unsafe fn test_mm_maskz_add_round_sd() {
58657 let a = _mm_set_pd(1., 2.);
58658 let b = _mm_set_pd(3., 4.);
58659 let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58660 let e = _mm_set_pd(1., 0.);
58661 assert_eq_m128d(r, e);
58662 let r =
58663 _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58664 let e = _mm_set_pd(1., 6.);
58665 assert_eq_m128d(r, e);
58666 }
58667
58668 #[simd_test(enable = "avx512f")]
58669 unsafe fn test_mm_sub_round_ss() {
58670 let a = _mm_set_ps(1., 2., 10., 20.);
58671 let b = _mm_set_ps(3., 4., 30., 40.);
58672 let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58673 let e = _mm_set_ps(1., 2., 10., -20.);
58674 assert_eq_m128(r, e);
58675 }
58676
58677 #[simd_test(enable = "avx512f")]
58678 unsafe fn test_mm_mask_sub_round_ss() {
58679 let src = _mm_set_ps(10., 11., 100., 110.);
58680 let a = _mm_set_ps(1., 2., 10., 20.);
58681 let b = _mm_set_ps(3., 4., 30., 40.);
58682 let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58683 let e = _mm_set_ps(1., 2., 10., 110.);
58684 assert_eq_m128(r, e);
58685 let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58686 src, 0b11111111, a, b,
58687 );
58688 let e = _mm_set_ps(1., 2., 10., -20.);
58689 assert_eq_m128(r, e);
58690 }
58691
58692 #[simd_test(enable = "avx512f")]
58693 unsafe fn test_mm_maskz_sub_round_ss() {
58694 let a = _mm_set_ps(1., 2., 10., 20.);
58695 let b = _mm_set_ps(3., 4., 30., 40.);
58696 let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58697 let e = _mm_set_ps(1., 2., 10., 0.);
58698 assert_eq_m128(r, e);
58699 let r =
58700 _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58701 let e = _mm_set_ps(1., 2., 10., -20.);
58702 assert_eq_m128(r, e);
58703 }
58704
58705 #[simd_test(enable = "avx512f")]
58706 unsafe fn test_mm_sub_round_sd() {
58707 let a = _mm_set_pd(1., 2.);
58708 let b = _mm_set_pd(3., 4.);
58709 let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58710 let e = _mm_set_pd(1., -2.);
58711 assert_eq_m128d(r, e);
58712 }
58713
58714 #[simd_test(enable = "avx512f")]
58715 unsafe fn test_mm_mask_sub_round_sd() {
58716 let src = _mm_set_pd(10., 11.);
58717 let a = _mm_set_pd(1., 2.);
58718 let b = _mm_set_pd(3., 4.);
58719 let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58720 let e = _mm_set_pd(1., 11.);
58721 assert_eq_m128d(r, e);
58722 let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58723 src, 0b11111111, a, b,
58724 );
58725 let e = _mm_set_pd(1., -2.);
58726 assert_eq_m128d(r, e);
58727 }
58728
58729 #[simd_test(enable = "avx512f")]
58730 unsafe fn test_mm_maskz_sub_round_sd() {
58731 let a = _mm_set_pd(1., 2.);
58732 let b = _mm_set_pd(3., 4.);
58733 let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58734 let e = _mm_set_pd(1., 0.);
58735 assert_eq_m128d(r, e);
58736 let r =
58737 _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58738 let e = _mm_set_pd(1., -2.);
58739 assert_eq_m128d(r, e);
58740 }
58741
58742 #[simd_test(enable = "avx512f")]
58743 unsafe fn test_mm_mul_round_ss() {
58744 let a = _mm_set_ps(1., 2., 10., 20.);
58745 let b = _mm_set_ps(3., 4., 30., 40.);
58746 let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58747 let e = _mm_set_ps(1., 2., 10., 800.);
58748 assert_eq_m128(r, e);
58749 }
58750
58751 #[simd_test(enable = "avx512f")]
58752 unsafe fn test_mm_mask_mul_round_ss() {
58753 let src = _mm_set_ps(10., 11., 100., 110.);
58754 let a = _mm_set_ps(1., 2., 10., 20.);
58755 let b = _mm_set_ps(3., 4., 30., 40.);
58756 let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58757 let e = _mm_set_ps(1., 2., 10., 110.);
58758 assert_eq_m128(r, e);
58759 let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58760 src, 0b11111111, a, b,
58761 );
58762 let e = _mm_set_ps(1., 2., 10., 800.);
58763 assert_eq_m128(r, e);
58764 }
58765
58766 #[simd_test(enable = "avx512f")]
58767 unsafe fn test_mm_maskz_mul_round_ss() {
58768 let a = _mm_set_ps(1., 2., 10., 20.);
58769 let b = _mm_set_ps(3., 4., 30., 40.);
58770 let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58771 let e = _mm_set_ps(1., 2., 10., 0.);
58772 assert_eq_m128(r, e);
58773 let r =
58774 _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58775 let e = _mm_set_ps(1., 2., 10., 800.);
58776 assert_eq_m128(r, e);
58777 }
58778
58779 #[simd_test(enable = "avx512f")]
58780 unsafe fn test_mm_mul_round_sd() {
58781 let a = _mm_set_pd(1., 2.);
58782 let b = _mm_set_pd(3., 4.);
58783 let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58784 let e = _mm_set_pd(1., 8.);
58785 assert_eq_m128d(r, e);
58786 }
58787
58788 #[simd_test(enable = "avx512f")]
58789 unsafe fn test_mm_mask_mul_round_sd() {
58790 let src = _mm_set_pd(10., 11.);
58791 let a = _mm_set_pd(1., 2.);
58792 let b = _mm_set_pd(3., 4.);
58793 let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58794 let e = _mm_set_pd(1., 11.);
58795 assert_eq_m128d(r, e);
58796 let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58797 src, 0b11111111, a, b,
58798 );
58799 let e = _mm_set_pd(1., 8.);
58800 assert_eq_m128d(r, e);
58801 }
58802
58803 #[simd_test(enable = "avx512f")]
58804 unsafe fn test_mm_maskz_mul_round_sd() {
58805 let a = _mm_set_pd(1., 2.);
58806 let b = _mm_set_pd(3., 4.);
58807 let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58808 let e = _mm_set_pd(1., 0.);
58809 assert_eq_m128d(r, e);
58810 let r =
58811 _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58812 let e = _mm_set_pd(1., 8.);
58813 assert_eq_m128d(r, e);
58814 }
58815
58816 #[simd_test(enable = "avx512f")]
58817 unsafe fn test_mm_div_round_ss() {
58818 let a = _mm_set_ps(1., 2., 10., 20.);
58819 let b = _mm_set_ps(3., 4., 30., 40.);
58820 let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58821 let e = _mm_set_ps(1., 2., 10., 0.5);
58822 assert_eq_m128(r, e);
58823 }
58824
58825 #[simd_test(enable = "avx512f")]
58826 unsafe fn test_mm_mask_div_round_ss() {
58827 let src = _mm_set_ps(10., 11., 100., 110.);
58828 let a = _mm_set_ps(1., 2., 10., 20.);
58829 let b = _mm_set_ps(3., 4., 30., 40.);
58830 let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58831 let e = _mm_set_ps(1., 2., 10., 110.);
58832 assert_eq_m128(r, e);
58833 let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58834 src, 0b11111111, a, b,
58835 );
58836 let e = _mm_set_ps(1., 2., 10., 0.5);
58837 assert_eq_m128(r, e);
58838 }
58839
58840 #[simd_test(enable = "avx512f")]
58841 unsafe fn test_mm_maskz_div_round_ss() {
58842 let a = _mm_set_ps(1., 2., 10., 20.);
58843 let b = _mm_set_ps(3., 4., 30., 40.);
58844 let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58845 let e = _mm_set_ps(1., 2., 10., 0.);
58846 assert_eq_m128(r, e);
58847 let r =
58848 _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58849 let e = _mm_set_ps(1., 2., 10., 0.5);
58850 assert_eq_m128(r, e);
58851 }
58852
58853 #[simd_test(enable = "avx512f")]
58854 unsafe fn test_mm_div_round_sd() {
58855 let a = _mm_set_pd(1., 2.);
58856 let b = _mm_set_pd(3., 4.);
58857 let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58858 let e = _mm_set_pd(1., 0.5);
58859 assert_eq_m128d(r, e);
58860 }
58861
58862 #[simd_test(enable = "avx512f")]
58863 unsafe fn test_mm_mask_div_round_sd() {
58864 let src = _mm_set_pd(10., 11.);
58865 let a = _mm_set_pd(1., 2.);
58866 let b = _mm_set_pd(3., 4.);
58867 let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58868 let e = _mm_set_pd(1., 11.);
58869 assert_eq_m128d(r, e);
58870 let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58871 src, 0b11111111, a, b,
58872 );
58873 let e = _mm_set_pd(1., 0.5);
58874 assert_eq_m128d(r, e);
58875 }
58876
58877 #[simd_test(enable = "avx512f")]
58878 unsafe fn test_mm_maskz_div_round_sd() {
58879 let a = _mm_set_pd(1., 2.);
58880 let b = _mm_set_pd(3., 4.);
58881 let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58882 let e = _mm_set_pd(1., 0.);
58883 assert_eq_m128d(r, e);
58884 let r =
58885 _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58886 let e = _mm_set_pd(1., 0.5);
58887 assert_eq_m128d(r, e);
58888 }
58889
58890 #[simd_test(enable = "avx512f")]
58891 unsafe fn test_mm_max_round_ss() {
58892 let a = _mm_set_ps(0., 1., 2., 3.);
58893 let b = _mm_set_ps(4., 5., 6., 7.);
58894 let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
58895 let e = _mm_set_ps(0., 1., 2., 7.);
58896 assert_eq_m128(r, e);
58897 }
58898
58899 #[simd_test(enable = "avx512f")]
58900 unsafe fn test_mm_mask_max_round_ss() {
58901 let a = _mm_set_ps(0., 1., 2., 3.);
58902 let b = _mm_set_ps(4., 5., 6., 7.);
58903 let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58904 let e = _mm_set_ps(0., 1., 2., 3.);
58905 assert_eq_m128(r, e);
58906 let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58907 let e = _mm_set_ps(0., 1., 2., 7.);
58908 assert_eq_m128(r, e);
58909 }
58910
58911 #[simd_test(enable = "avx512f")]
58912 unsafe fn test_mm_maskz_max_round_ss() {
58913 let a = _mm_set_ps(0., 1., 2., 3.);
58914 let b = _mm_set_ps(4., 5., 6., 7.);
58915 let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58916 let e = _mm_set_ps(0., 1., 2., 0.);
58917 assert_eq_m128(r, e);
58918 let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58919 let e = _mm_set_ps(0., 1., 2., 7.);
58920 assert_eq_m128(r, e);
58921 }
58922
58923 #[simd_test(enable = "avx512f")]
58924 unsafe fn test_mm_max_round_sd() {
58925 let a = _mm_set_pd(0., 1.);
58926 let b = _mm_set_pd(2., 3.);
58927 let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
58928 let e = _mm_set_pd(0., 3.);
58929 assert_eq_m128d(r, e);
58930 }
58931
58932 #[simd_test(enable = "avx512f")]
58933 unsafe fn test_mm_mask_max_round_sd() {
58934 let a = _mm_set_pd(0., 1.);
58935 let b = _mm_set_pd(2., 3.);
58936 let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58937 let e = _mm_set_pd(0., 1.);
58938 assert_eq_m128d(r, e);
58939 let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58940 let e = _mm_set_pd(0., 3.);
58941 assert_eq_m128d(r, e);
58942 }
58943
58944 #[simd_test(enable = "avx512f")]
58945 unsafe fn test_mm_maskz_max_round_sd() {
58946 let a = _mm_set_pd(0., 1.);
58947 let b = _mm_set_pd(2., 3.);
58948 let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58949 let e = _mm_set_pd(0., 0.);
58950 assert_eq_m128d(r, e);
58951 let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58952 let e = _mm_set_pd(0., 3.);
58953 assert_eq_m128d(r, e);
58954 }
58955
58956 #[simd_test(enable = "avx512f")]
58957 unsafe fn test_mm_min_round_ss() {
58958 let a = _mm_set_ps(0., 1., 2., 3.);
58959 let b = _mm_set_ps(4., 5., 6., 7.);
58960 let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
58961 let e = _mm_set_ps(0., 1., 2., 3.);
58962 assert_eq_m128(r, e);
58963 }
58964
58965 #[simd_test(enable = "avx512f")]
58966 unsafe fn test_mm_mask_min_round_ss() {
58967 let a = _mm_set_ps(0., 1., 2., 3.);
58968 let b = _mm_set_ps(4., 5., 6., 7.);
58969 let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58970 let e = _mm_set_ps(0., 1., 2., 3.);
58971 assert_eq_m128(r, e);
58972 let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58973 let e = _mm_set_ps(0., 1., 2., 3.);
58974 assert_eq_m128(r, e);
58975 }
58976
58977 #[simd_test(enable = "avx512f")]
58978 unsafe fn test_mm_maskz_min_round_ss() {
58979 let a = _mm_set_ps(0., 1., 2., 3.);
58980 let b = _mm_set_ps(4., 5., 6., 7.);
58981 let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58982 let e = _mm_set_ps(0., 1., 2., 0.);
58983 assert_eq_m128(r, e);
58984 let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58985 let e = _mm_set_ps(0., 1., 2., 3.);
58986 assert_eq_m128(r, e);
58987 }
58988
58989 #[simd_test(enable = "avx512f")]
58990 unsafe fn test_mm_min_round_sd() {
58991 let a = _mm_set_pd(0., 1.);
58992 let b = _mm_set_pd(2., 3.);
58993 let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
58994 let e = _mm_set_pd(0., 1.);
58995 assert_eq_m128d(r, e);
58996 }
58997
58998 #[simd_test(enable = "avx512f")]
58999 unsafe fn test_mm_mask_min_round_sd() {
59000 let a = _mm_set_pd(0., 1.);
59001 let b = _mm_set_pd(2., 3.);
59002 let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59003 let e = _mm_set_pd(0., 1.);
59004 assert_eq_m128d(r, e);
59005 let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59006 let e = _mm_set_pd(0., 1.);
59007 assert_eq_m128d(r, e);
59008 }
59009
59010 #[simd_test(enable = "avx512f")]
59011 unsafe fn test_mm_maskz_min_round_sd() {
59012 let a = _mm_set_pd(0., 1.);
59013 let b = _mm_set_pd(2., 3.);
59014 let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
59015 let e = _mm_set_pd(0., 0.);
59016 assert_eq_m128d(r, e);
59017 let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59018 let e = _mm_set_pd(0., 1.);
59019 assert_eq_m128d(r, e);
59020 }
59021
59022 #[simd_test(enable = "avx512f")]
59023 unsafe fn test_mm_sqrt_round_ss() {
59024 let a = _mm_set_ps(1., 2., 10., 20.);
59025 let b = _mm_set_ps(3., 4., 30., 4.);
59026 let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
59027 let e = _mm_set_ps(1., 2., 10., 2.);
59028 assert_eq_m128(r, e);
59029 }
59030
59031 #[simd_test(enable = "avx512f")]
59032 unsafe fn test_mm_mask_sqrt_round_ss() {
59033 let src = _mm_set_ps(10., 11., 100., 110.);
59034 let a = _mm_set_ps(1., 2., 10., 20.);
59035 let b = _mm_set_ps(3., 4., 30., 4.);
59036 let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
59037 let e = _mm_set_ps(1., 2., 10., 110.);
59038 assert_eq_m128(r, e);
59039 let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
59040 src, 0b11111111, a, b,
59041 );
59042 let e = _mm_set_ps(1., 2., 10., 2.);
59043 assert_eq_m128(r, e);
59044 }
59045
59046 #[simd_test(enable = "avx512f")]
59047 unsafe fn test_mm_maskz_sqrt_round_ss() {
59048 let a = _mm_set_ps(1., 2., 10., 20.);
59049 let b = _mm_set_ps(3., 4., 30., 4.);
59050 let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
59051 let e = _mm_set_ps(1., 2., 10., 0.);
59052 assert_eq_m128(r, e);
59053 let r =
59054 _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
59055 let e = _mm_set_ps(1., 2., 10., 2.);
59056 assert_eq_m128(r, e);
59057 }
59058
59059 #[simd_test(enable = "avx512f")]
59060 unsafe fn test_mm_sqrt_round_sd() {
59061 let a = _mm_set_pd(1., 2.);
59062 let b = _mm_set_pd(3., 4.);
59063 let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
59064 let e = _mm_set_pd(1., 2.);
59065 assert_eq_m128d(r, e);
59066 }
59067
59068 #[simd_test(enable = "avx512f")]
59069 unsafe fn test_mm_mask_sqrt_round_sd() {
59070 let src = _mm_set_pd(10., 11.);
59071 let a = _mm_set_pd(1., 2.);
59072 let b = _mm_set_pd(3., 4.);
59073 let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
59074 let e = _mm_set_pd(1., 11.);
59075 assert_eq_m128d(r, e);
59076 let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
59077 src, 0b11111111, a, b,
59078 );
59079 let e = _mm_set_pd(1., 2.);
59080 assert_eq_m128d(r, e);
59081 }
59082
59083 #[simd_test(enable = "avx512f")]
59084 unsafe fn test_mm_maskz_sqrt_round_sd() {
59085 let a = _mm_set_pd(1., 2.);
59086 let b = _mm_set_pd(3., 4.);
59087 let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
59088 let e = _mm_set_pd(1., 0.);
59089 assert_eq_m128d(r, e);
59090 let r =
59091 _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
59092 let e = _mm_set_pd(1., 2.);
59093 assert_eq_m128d(r, e);
59094 }
59095
59096 #[simd_test(enable = "avx512f")]
59097 unsafe fn test_mm_getexp_round_ss() {
59098 let a = _mm_set1_ps(2.);
59099 let b = _mm_set1_ps(3.);
59100 let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
59101 let e = _mm_set_ps(2., 2., 2., 1.);
59102 assert_eq_m128(r, e);
59103 }
59104
59105 #[simd_test(enable = "avx512f")]
59106 unsafe fn test_mm_mask_getexp_round_ss() {
59107 let a = _mm_set1_ps(2.);
59108 let b = _mm_set1_ps(3.);
59109 let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59110 let e = _mm_set_ps(2., 2., 2., 2.);
59111 assert_eq_m128(r, e);
59112 let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59113 let e = _mm_set_ps(2., 2., 2., 1.);
59114 assert_eq_m128(r, e);
59115 }
59116
59117 #[simd_test(enable = "avx512f")]
59118 unsafe fn test_mm_maskz_getexp_round_ss() {
59119 let a = _mm_set1_ps(2.);
59120 let b = _mm_set1_ps(3.);
59121 let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
59122 let e = _mm_set_ps(2., 2., 2., 0.);
59123 assert_eq_m128(r, e);
59124 let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59125 let e = _mm_set_ps(2., 2., 2., 1.);
59126 assert_eq_m128(r, e);
59127 }
59128
59129 #[simd_test(enable = "avx512f")]
59130 unsafe fn test_mm_getexp_round_sd() {
59131 let a = _mm_set1_pd(2.);
59132 let b = _mm_set1_pd(3.);
59133 let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
59134 let e = _mm_set_pd(2., 1.);
59135 assert_eq_m128d(r, e);
59136 }
59137
59138 #[simd_test(enable = "avx512f")]
59139 unsafe fn test_mm_mask_getexp_round_sd() {
59140 let a = _mm_set1_pd(2.);
59141 let b = _mm_set1_pd(3.);
59142 let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59143 let e = _mm_set_pd(2., 2.);
59144 assert_eq_m128d(r, e);
59145 let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59146 let e = _mm_set_pd(2., 1.);
59147 assert_eq_m128d(r, e);
59148 }
59149
59150 #[simd_test(enable = "avx512f")]
59151 unsafe fn test_mm_maskz_getexp_round_sd() {
59152 let a = _mm_set1_pd(2.);
59153 let b = _mm_set1_pd(3.);
59154 let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
59155 let e = _mm_set_pd(2., 0.);
59156 assert_eq_m128d(r, e);
59157 let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59158 let e = _mm_set_pd(2., 1.);
59159 assert_eq_m128d(r, e);
59160 }
59161
59162 #[simd_test(enable = "avx512f")]
59163 unsafe fn test_mm_getmant_round_ss() {
59164 let a = _mm_set1_ps(20.);
59165 let b = _mm_set1_ps(10.);
59166 let r =
59167 _mm_getmant_round_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
59168 a, b,
59169 );
59170 let e = _mm_set_ps(20., 20., 20., 1.25);
59171 assert_eq_m128(r, e);
59172 }
59173
59174 #[simd_test(enable = "avx512f")]
59175 unsafe fn test_mm_mask_getmant_round_ss() {
59176 let a = _mm_set1_ps(20.);
59177 let b = _mm_set1_ps(10.);
59178 let r = _mm_mask_getmant_round_ss::<
59179 _MM_MANT_NORM_1_2,
59180 _MM_MANT_SIGN_SRC,
59181 _MM_FROUND_CUR_DIRECTION,
59182 >(a, 0, a, b);
59183 let e = _mm_set_ps(20., 20., 20., 20.);
59184 assert_eq_m128(r, e);
59185 let r = _mm_mask_getmant_round_ss::<
59186 _MM_MANT_NORM_1_2,
59187 _MM_MANT_SIGN_SRC,
59188 _MM_FROUND_CUR_DIRECTION,
59189 >(a, 0b11111111, a, b);
59190 let e = _mm_set_ps(20., 20., 20., 1.25);
59191 assert_eq_m128(r, e);
59192 }
59193
59194 #[simd_test(enable = "avx512f")]
59195 unsafe fn test_mm_maskz_getmant_round_ss() {
59196 let a = _mm_set1_ps(20.);
59197 let b = _mm_set1_ps(10.);
59198 let r = _mm_maskz_getmant_round_ss::<
59199 _MM_MANT_NORM_1_2,
59200 _MM_MANT_SIGN_SRC,
59201 _MM_FROUND_CUR_DIRECTION,
59202 >(0, a, b);
59203 let e = _mm_set_ps(20., 20., 20., 0.);
59204 assert_eq_m128(r, e);
59205 let r = _mm_maskz_getmant_round_ss::<
59206 _MM_MANT_NORM_1_2,
59207 _MM_MANT_SIGN_SRC,
59208 _MM_FROUND_CUR_DIRECTION,
59209 >(0b11111111, a, b);
59210 let e = _mm_set_ps(20., 20., 20., 1.25);
59211 assert_eq_m128(r, e);
59212 }
59213
59214 #[simd_test(enable = "avx512f")]
59215 unsafe fn test_mm_getmant_round_sd() {
59216 let a = _mm_set1_pd(20.);
59217 let b = _mm_set1_pd(10.);
59218 let r =
59219 _mm_getmant_round_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
59220 a, b,
59221 );
59222 let e = _mm_set_pd(20., 1.25);
59223 assert_eq_m128d(r, e);
59224 }
59225
59226 #[simd_test(enable = "avx512f")]
59227 unsafe fn test_mm_mask_getmant_round_sd() {
59228 let a = _mm_set1_pd(20.);
59229 let b = _mm_set1_pd(10.);
59230 let r = _mm_mask_getmant_round_sd::<
59231 _MM_MANT_NORM_1_2,
59232 _MM_MANT_SIGN_SRC,
59233 _MM_FROUND_CUR_DIRECTION,
59234 >(a, 0, a, b);
59235 let e = _mm_set_pd(20., 20.);
59236 assert_eq_m128d(r, e);
59237 let r = _mm_mask_getmant_round_sd::<
59238 _MM_MANT_NORM_1_2,
59239 _MM_MANT_SIGN_SRC,
59240 _MM_FROUND_CUR_DIRECTION,
59241 >(a, 0b11111111, a, b);
59242 let e = _mm_set_pd(20., 1.25);
59243 assert_eq_m128d(r, e);
59244 }
59245
59246 #[simd_test(enable = "avx512f")]
59247 unsafe fn test_mm_maskz_getmant_round_sd() {
59248 let a = _mm_set1_pd(20.);
59249 let b = _mm_set1_pd(10.);
59250 let r = _mm_maskz_getmant_round_sd::<
59251 _MM_MANT_NORM_1_2,
59252 _MM_MANT_SIGN_SRC,
59253 _MM_FROUND_CUR_DIRECTION,
59254 >(0, a, b);
59255 let e = _mm_set_pd(20., 0.);
59256 assert_eq_m128d(r, e);
59257 let r = _mm_maskz_getmant_round_sd::<
59258 _MM_MANT_NORM_1_2,
59259 _MM_MANT_SIGN_SRC,
59260 _MM_FROUND_CUR_DIRECTION,
59261 >(0b11111111, a, b);
59262 let e = _mm_set_pd(20., 1.25);
59263 assert_eq_m128d(r, e);
59264 }
59265
59266 #[simd_test(enable = "avx512f")]
59267 unsafe fn test_mm_roundscale_round_ss() {
59268 let a = _mm_set1_ps(2.2);
59269 let b = _mm_set1_ps(1.1);
59270 let r = _mm_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
59271 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59272 assert_eq_m128(r, e);
59273 }
59274
59275 #[simd_test(enable = "avx512f")]
59276 unsafe fn test_mm_mask_roundscale_round_ss() {
59277 let a = _mm_set1_ps(2.2);
59278 let b = _mm_set1_ps(1.1);
59279 let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59280 let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
59281 assert_eq_m128(r, e);
59282 let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59283 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59284 assert_eq_m128(r, e);
59285 }
59286
59287 #[simd_test(enable = "avx512f")]
59288 unsafe fn test_mm_maskz_roundscale_round_ss() {
59289 let a = _mm_set1_ps(2.2);
59290 let b = _mm_set1_ps(1.1);
59291 let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
59292 let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
59293 assert_eq_m128(r, e);
59294 let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59295 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59296 assert_eq_m128(r, e);
59297 }
59298
59299 #[simd_test(enable = "avx512f")]
59300 unsafe fn test_mm_roundscale_round_sd() {
59301 let a = _mm_set1_pd(2.2);
59302 let b = _mm_set1_pd(1.1);
59303 let r = _mm_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
59304 let e = _mm_set_pd(2.2, 1.0);
59305 assert_eq_m128d(r, e);
59306 }
59307
59308 #[simd_test(enable = "avx512f")]
59309 unsafe fn test_mm_mask_roundscale_round_sd() {
59310 let a = _mm_set1_pd(2.2);
59311 let b = _mm_set1_pd(1.1);
59312 let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59313 let e = _mm_set_pd(2.2, 2.2);
59314 assert_eq_m128d(r, e);
59315 let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59316 let e = _mm_set_pd(2.2, 1.0);
59317 assert_eq_m128d(r, e);
59318 }
59319
59320 #[simd_test(enable = "avx512f")]
59321 unsafe fn test_mm_maskz_roundscale_round_sd() {
59322 let a = _mm_set1_pd(2.2);
59323 let b = _mm_set1_pd(1.1);
59324 let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
59325 let e = _mm_set_pd(2.2, 0.0);
59326 assert_eq_m128d(r, e);
59327 let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59328 let e = _mm_set_pd(2.2, 1.0);
59329 assert_eq_m128d(r, e);
59330 }
59331
59332 #[simd_test(enable = "avx512f")]
59333 unsafe fn test_mm_scalef_round_ss() {
59334 let a = _mm_set1_ps(1.);
59335 let b = _mm_set1_ps(3.);
59336 let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
59337 let e = _mm_set_ps(1., 1., 1., 8.);
59338 assert_eq_m128(r, e);
59339 }
59340
59341 #[simd_test(enable = "avx512f")]
59342 unsafe fn test_mm_mask_scalef_round_ss() {
59343 let a = _mm_set1_ps(1.);
59344 let b = _mm_set1_ps(3.);
59345 let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59346 a, 0, a, b,
59347 );
59348 let e = _mm_set_ps(1., 1., 1., 1.);
59349 assert_eq_m128(r, e);
59350 let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59351 a, 0b11111111, a, b,
59352 );
59353 let e = _mm_set_ps(1., 1., 1., 8.);
59354 assert_eq_m128(r, e);
59355 }
59356
59357 #[simd_test(enable = "avx512f")]
59358 unsafe fn test_mm_maskz_scalef_round_ss() {
59359 let a = _mm_set1_ps(1.);
59360 let b = _mm_set1_ps(3.);
59361 let r =
59362 _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
59363 let e = _mm_set_ps(1., 1., 1., 0.);
59364 assert_eq_m128(r, e);
59365 let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59366 0b11111111, a, b,
59367 );
59368 let e = _mm_set_ps(1., 1., 1., 8.);
59369 assert_eq_m128(r, e);
59370 }
59371
59372 #[simd_test(enable = "avx512f")]
59373 unsafe fn test_mm_scalef_round_sd() {
59374 let a = _mm_set1_pd(1.);
59375 let b = _mm_set1_pd(3.);
59376 let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
59377 let e = _mm_set_pd(1., 8.);
59378 assert_eq_m128d(r, e);
59379 }
59380
59381 #[simd_test(enable = "avx512f")]
59382 unsafe fn test_mm_mask_scalef_round_sd() {
59383 let a = _mm_set1_pd(1.);
59384 let b = _mm_set1_pd(3.);
59385 let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59386 a, 0, a, b,
59387 );
59388 let e = _mm_set_pd(1., 1.);
59389 assert_eq_m128d(r, e);
59390 let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59391 a, 0b11111111, a, b,
59392 );
59393 let e = _mm_set_pd(1., 8.);
59394 assert_eq_m128d(r, e);
59395 }
59396
59397 #[simd_test(enable = "avx512f")]
59398 unsafe fn test_mm_maskz_scalef_round_sd() {
59399 let a = _mm_set1_pd(1.);
59400 let b = _mm_set1_pd(3.);
59401 let r =
59402 _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
59403 let e = _mm_set_pd(1., 0.);
59404 assert_eq_m128d(r, e);
59405 let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59406 0b11111111, a, b,
59407 );
59408 let e = _mm_set_pd(1., 8.);
59409 assert_eq_m128d(r, e);
59410 }
59411
59412 #[simd_test(enable = "avx512f")]
59413 unsafe fn test_mm_fmadd_round_ss() {
59414 let a = _mm_set1_ps(1.);
59415 let b = _mm_set1_ps(2.);
59416 let c = _mm_set1_ps(3.);
59417 let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59418 let e = _mm_set_ps(1., 1., 1., 5.);
59419 assert_eq_m128(r, e);
59420 }
59421
59422 #[simd_test(enable = "avx512f")]
59423 unsafe fn test_mm_mask_fmadd_round_ss() {
59424 let a = _mm_set1_ps(1.);
59425 let b = _mm_set1_ps(2.);
59426 let c = _mm_set1_ps(3.);
59427 let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59428 a, 0, b, c,
59429 );
59430 assert_eq_m128(r, a);
59431 let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59432 a, 0b11111111, b, c,
59433 );
59434 let e = _mm_set_ps(1., 1., 1., 5.);
59435 assert_eq_m128(r, e);
59436 }
59437
59438 #[simd_test(enable = "avx512f")]
59439 unsafe fn test_mm_maskz_fmadd_round_ss() {
59440 let a = _mm_set1_ps(1.);
59441 let b = _mm_set1_ps(2.);
59442 let c = _mm_set1_ps(3.);
59443 let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59444 0, a, b, c,
59445 );
59446 let e = _mm_set_ps(1., 1., 1., 0.);
59447 assert_eq_m128(r, e);
59448 let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59449 0b11111111, a, b, c,
59450 );
59451 let e = _mm_set_ps(1., 1., 1., 5.);
59452 assert_eq_m128(r, e);
59453 }
59454
59455 #[simd_test(enable = "avx512f")]
59456 unsafe fn test_mm_mask3_fmadd_round_ss() {
59457 let a = _mm_set1_ps(1.);
59458 let b = _mm_set1_ps(2.);
59459 let c = _mm_set1_ps(3.);
59460 let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59461 a, b, c, 0,
59462 );
59463 assert_eq_m128(r, c);
59464 let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59465 a, b, c, 0b11111111,
59466 );
59467 let e = _mm_set_ps(3., 3., 3., 5.);
59468 assert_eq_m128(r, e);
59469 }
59470
59471 #[simd_test(enable = "avx512f")]
59472 unsafe fn test_mm_fmadd_round_sd() {
59473 let a = _mm_set1_pd(1.);
59474 let b = _mm_set1_pd(2.);
59475 let c = _mm_set1_pd(3.);
59476 let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59477 let e = _mm_set_pd(1., 5.);
59478 assert_eq_m128d(r, e);
59479 }
59480
59481 #[simd_test(enable = "avx512f")]
59482 unsafe fn test_mm_mask_fmadd_round_sd() {
59483 let a = _mm_set1_pd(1.);
59484 let b = _mm_set1_pd(2.);
59485 let c = _mm_set1_pd(3.);
59486 let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59487 a, 0, b, c,
59488 );
59489 assert_eq_m128d(r, a);
59490 let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59491 a, 0b11111111, b, c,
59492 );
59493 let e = _mm_set_pd(1., 5.);
59494 assert_eq_m128d(r, e);
59495 }
59496
59497 #[simd_test(enable = "avx512f")]
59498 unsafe fn test_mm_maskz_fmadd_round_sd() {
59499 let a = _mm_set1_pd(1.);
59500 let b = _mm_set1_pd(2.);
59501 let c = _mm_set1_pd(3.);
59502 let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59503 0, a, b, c,
59504 );
59505 let e = _mm_set_pd(1., 0.);
59506 assert_eq_m128d(r, e);
59507 let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59508 0b11111111, a, b, c,
59509 );
59510 let e = _mm_set_pd(1., 5.);
59511 assert_eq_m128d(r, e);
59512 }
59513
59514 #[simd_test(enable = "avx512f")]
59515 unsafe fn test_mm_mask3_fmadd_round_sd() {
59516 let a = _mm_set1_pd(1.);
59517 let b = _mm_set1_pd(2.);
59518 let c = _mm_set1_pd(3.);
59519 let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59520 a, b, c, 0,
59521 );
59522 assert_eq_m128d(r, c);
59523 let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59524 a, b, c, 0b11111111,
59525 );
59526 let e = _mm_set_pd(3., 5.);
59527 assert_eq_m128d(r, e);
59528 }
59529
59530 #[simd_test(enable = "avx512f")]
59531 unsafe fn test_mm_fmsub_round_ss() {
59532 let a = _mm_set1_ps(1.);
59533 let b = _mm_set1_ps(2.);
59534 let c = _mm_set1_ps(3.);
59535 let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59536 let e = _mm_set_ps(1., 1., 1., -1.);
59537 assert_eq_m128(r, e);
59538 }
59539
59540 #[simd_test(enable = "avx512f")]
59541 unsafe fn test_mm_mask_fmsub_round_ss() {
59542 let a = _mm_set1_ps(1.);
59543 let b = _mm_set1_ps(2.);
59544 let c = _mm_set1_ps(3.);
59545 let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59546 a, 0, b, c,
59547 );
59548 assert_eq_m128(r, a);
59549 let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59550 a, 0b11111111, b, c,
59551 );
59552 let e = _mm_set_ps(1., 1., 1., -1.);
59553 assert_eq_m128(r, e);
59554 }
59555
59556 #[simd_test(enable = "avx512f")]
59557 unsafe fn test_mm_maskz_fmsub_round_ss() {
59558 let a = _mm_set1_ps(1.);
59559 let b = _mm_set1_ps(2.);
59560 let c = _mm_set1_ps(3.);
59561 let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59562 0, a, b, c,
59563 );
59564 let e = _mm_set_ps(1., 1., 1., 0.);
59565 assert_eq_m128(r, e);
59566 let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59567 0b11111111, a, b, c,
59568 );
59569 let e = _mm_set_ps(1., 1., 1., -1.);
59570 assert_eq_m128(r, e);
59571 }
59572
59573 #[simd_test(enable = "avx512f")]
59574 unsafe fn test_mm_mask3_fmsub_round_ss() {
59575 let a = _mm_set1_ps(1.);
59576 let b = _mm_set1_ps(2.);
59577 let c = _mm_set1_ps(3.);
59578 let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59579 a, b, c, 0,
59580 );
59581 assert_eq_m128(r, c);
59582 let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59583 a, b, c, 0b11111111,
59584 );
59585 let e = _mm_set_ps(3., 3., 3., -1.);
59586 assert_eq_m128(r, e);
59587 }
59588
59589 #[simd_test(enable = "avx512f")]
59590 unsafe fn test_mm_fmsub_round_sd() {
59591 let a = _mm_set1_pd(1.);
59592 let b = _mm_set1_pd(2.);
59593 let c = _mm_set1_pd(3.);
59594 let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59595 let e = _mm_set_pd(1., -1.);
59596 assert_eq_m128d(r, e);
59597 }
59598
59599 #[simd_test(enable = "avx512f")]
59600 unsafe fn test_mm_mask_fmsub_round_sd() {
59601 let a = _mm_set1_pd(1.);
59602 let b = _mm_set1_pd(2.);
59603 let c = _mm_set1_pd(3.);
59604 let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59605 a, 0, b, c,
59606 );
59607 assert_eq_m128d(r, a);
59608 let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59609 a, 0b11111111, b, c,
59610 );
59611 let e = _mm_set_pd(1., -1.);
59612 assert_eq_m128d(r, e);
59613 }
59614
59615 #[simd_test(enable = "avx512f")]
59616 unsafe fn test_mm_maskz_fmsub_round_sd() {
59617 let a = _mm_set1_pd(1.);
59618 let b = _mm_set1_pd(2.);
59619 let c = _mm_set1_pd(3.);
59620 let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59621 0, a, b, c,
59622 );
59623 let e = _mm_set_pd(1., 0.);
59624 assert_eq_m128d(r, e);
59625 let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59626 0b11111111, a, b, c,
59627 );
59628 let e = _mm_set_pd(1., -1.);
59629 assert_eq_m128d(r, e);
59630 }
59631
59632 #[simd_test(enable = "avx512f")]
59633 unsafe fn test_mm_mask3_fmsub_round_sd() {
59634 let a = _mm_set1_pd(1.);
59635 let b = _mm_set1_pd(2.);
59636 let c = _mm_set1_pd(3.);
59637 let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59638 a, b, c, 0,
59639 );
59640 assert_eq_m128d(r, c);
59641 let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59642 a, b, c, 0b11111111,
59643 );
59644 let e = _mm_set_pd(3., -1.);
59645 assert_eq_m128d(r, e);
59646 }
59647
59648 #[simd_test(enable = "avx512f")]
59649 unsafe fn test_mm_fnmadd_round_ss() {
59650 let a = _mm_set1_ps(1.);
59651 let b = _mm_set1_ps(2.);
59652 let c = _mm_set1_ps(3.);
59653 let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59654 let e = _mm_set_ps(1., 1., 1., 1.);
59655 assert_eq_m128(r, e);
59656 }
59657
59658 #[simd_test(enable = "avx512f")]
59659 unsafe fn test_mm_mask_fnmadd_round_ss() {
59660 let a = _mm_set1_ps(1.);
59661 let b = _mm_set1_ps(2.);
59662 let c = _mm_set1_ps(3.);
59663 let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59664 a, 0, b, c,
59665 );
59666 assert_eq_m128(r, a);
59667 let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59668 a, 0b11111111, b, c,
59669 );
59670 let e = _mm_set_ps(1., 1., 1., 1.);
59671 assert_eq_m128(r, e);
59672 }
59673
59674 #[simd_test(enable = "avx512f")]
59675 unsafe fn test_mm_maskz_fnmadd_round_ss() {
59676 let a = _mm_set1_ps(1.);
59677 let b = _mm_set1_ps(2.);
59678 let c = _mm_set1_ps(3.);
59679 let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59680 0, a, b, c,
59681 );
59682 let e = _mm_set_ps(1., 1., 1., 0.);
59683 assert_eq_m128(r, e);
59684 let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59685 0b11111111, a, b, c,
59686 );
59687 let e = _mm_set_ps(1., 1., 1., 1.);
59688 assert_eq_m128(r, e);
59689 }
59690
59691 #[simd_test(enable = "avx512f")]
59692 unsafe fn test_mm_mask3_fnmadd_round_ss() {
59693 let a = _mm_set1_ps(1.);
59694 let b = _mm_set1_ps(2.);
59695 let c = _mm_set1_ps(3.);
59696 let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59697 a, b, c, 0,
59698 );
59699 assert_eq_m128(r, c);
59700 let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59701 a, b, c, 0b11111111,
59702 );
59703 let e = _mm_set_ps(3., 3., 3., 1.);
59704 assert_eq_m128(r, e);
59705 }
59706
59707 #[simd_test(enable = "avx512f")]
59708 unsafe fn test_mm_fnmadd_round_sd() {
59709 let a = _mm_set1_pd(1.);
59710 let b = _mm_set1_pd(2.);
59711 let c = _mm_set1_pd(3.);
59712 let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59713 let e = _mm_set_pd(1., 1.);
59714 assert_eq_m128d(r, e);
59715 }
59716
59717 #[simd_test(enable = "avx512f")]
59718 unsafe fn test_mm_mask_fnmadd_round_sd() {
59719 let a = _mm_set1_pd(1.);
59720 let b = _mm_set1_pd(2.);
59721 let c = _mm_set1_pd(3.);
59722 let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59723 a, 0, b, c,
59724 );
59725 assert_eq_m128d(r, a);
59726 let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59727 a, 0b11111111, b, c,
59728 );
59729 let e = _mm_set_pd(1., 1.);
59730 assert_eq_m128d(r, e);
59731 }
59732
59733 #[simd_test(enable = "avx512f")]
59734 unsafe fn test_mm_maskz_fnmadd_round_sd() {
59735 let a = _mm_set1_pd(1.);
59736 let b = _mm_set1_pd(2.);
59737 let c = _mm_set1_pd(3.);
59738 let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59739 0, a, b, c,
59740 );
59741 let e = _mm_set_pd(1., 0.);
59742 assert_eq_m128d(r, e);
59743 let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59744 0b11111111, a, b, c,
59745 );
59746 let e = _mm_set_pd(1., 1.);
59747 assert_eq_m128d(r, e);
59748 }
59749
59750 #[simd_test(enable = "avx512f")]
59751 unsafe fn test_mm_mask3_fnmadd_round_sd() {
59752 let a = _mm_set1_pd(1.);
59753 let b = _mm_set1_pd(2.);
59754 let c = _mm_set1_pd(3.);
59755 let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59756 a, b, c, 0,
59757 );
59758 assert_eq_m128d(r, c);
59759 let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59760 a, b, c, 0b11111111,
59761 );
59762 let e = _mm_set_pd(3., 1.);
59763 assert_eq_m128d(r, e);
59764 }
59765
59766 #[simd_test(enable = "avx512f")]
59767 unsafe fn test_mm_fnmsub_round_ss() {
59768 let a = _mm_set1_ps(1.);
59769 let b = _mm_set1_ps(2.);
59770 let c = _mm_set1_ps(3.);
59771 let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59772 let e = _mm_set_ps(1., 1., 1., -5.);
59773 assert_eq_m128(r, e);
59774 }
59775
59776 #[simd_test(enable = "avx512f")]
59777 unsafe fn test_mm_mask_fnmsub_round_ss() {
59778 let a = _mm_set1_ps(1.);
59779 let b = _mm_set1_ps(2.);
59780 let c = _mm_set1_ps(3.);
59781 let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59782 a, 0, b, c,
59783 );
59784 assert_eq_m128(r, a);
59785 let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59786 a, 0b11111111, b, c,
59787 );
59788 let e = _mm_set_ps(1., 1., 1., -5.);
59789 assert_eq_m128(r, e);
59790 }
59791
59792 #[simd_test(enable = "avx512f")]
59793 unsafe fn test_mm_maskz_fnmsub_round_ss() {
59794 let a = _mm_set1_ps(1.);
59795 let b = _mm_set1_ps(2.);
59796 let c = _mm_set1_ps(3.);
59797 let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59798 0, a, b, c,
59799 );
59800 let e = _mm_set_ps(1., 1., 1., 0.);
59801 assert_eq_m128(r, e);
59802 let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59803 0b11111111, a, b, c,
59804 );
59805 let e = _mm_set_ps(1., 1., 1., -5.);
59806 assert_eq_m128(r, e);
59807 }
59808
59809 #[simd_test(enable = "avx512f")]
59810 unsafe fn test_mm_mask3_fnmsub_round_ss() {
59811 let a = _mm_set1_ps(1.);
59812 let b = _mm_set1_ps(2.);
59813 let c = _mm_set1_ps(3.);
59814 let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59815 a, b, c, 0,
59816 );
59817 assert_eq_m128(r, c);
59818 let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59819 a, b, c, 0b11111111,
59820 );
59821 let e = _mm_set_ps(3., 3., 3., -5.);
59822 assert_eq_m128(r, e);
59823 }
59824
59825 #[simd_test(enable = "avx512f")]
59826 unsafe fn test_mm_fnmsub_round_sd() {
59827 let a = _mm_set1_pd(1.);
59828 let b = _mm_set1_pd(2.);
59829 let c = _mm_set1_pd(3.);
59830 let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59831 let e = _mm_set_pd(1., -5.);
59832 assert_eq_m128d(r, e);
59833 }
59834
59835 #[simd_test(enable = "avx512f")]
59836 unsafe fn test_mm_mask_fnmsub_round_sd() {
59837 let a = _mm_set1_pd(1.);
59838 let b = _mm_set1_pd(2.);
59839 let c = _mm_set1_pd(3.);
59840 let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59841 a, 0, b, c,
59842 );
59843 assert_eq_m128d(r, a);
59844 let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59845 a, 0b11111111, b, c,
59846 );
59847 let e = _mm_set_pd(1., -5.);
59848 assert_eq_m128d(r, e);
59849 }
59850
59851 #[simd_test(enable = "avx512f")]
59852 unsafe fn test_mm_maskz_fnmsub_round_sd() {
59853 let a = _mm_set1_pd(1.);
59854 let b = _mm_set1_pd(2.);
59855 let c = _mm_set1_pd(3.);
59856 let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59857 0, a, b, c,
59858 );
59859 let e = _mm_set_pd(1., 0.);
59860 assert_eq_m128d(r, e);
59861 let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59862 0b11111111, a, b, c,
59863 );
59864 let e = _mm_set_pd(1., -5.);
59865 assert_eq_m128d(r, e);
59866 }
59867
59868 #[simd_test(enable = "avx512f")]
59869 unsafe fn test_mm_mask3_fnmsub_round_sd() {
59870 let a = _mm_set1_pd(1.);
59871 let b = _mm_set1_pd(2.);
59872 let c = _mm_set1_pd(3.);
59873 let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59874 a, b, c, 0,
59875 );
59876 assert_eq_m128d(r, c);
59877 let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59878 a, b, c, 0b11111111,
59879 );
59880 let e = _mm_set_pd(3., -5.);
59881 assert_eq_m128d(r, e);
59882 }
59883
59884 #[simd_test(enable = "avx512f")]
59885 unsafe fn test_mm_fixupimm_ss() {
59886 let a = _mm_set_ps(0., 0., 0., f32::NAN);
59887 let b = _mm_set1_ps(f32::MAX);
59888 let c = _mm_set1_epi32(i32::MAX);
59889 let r = _mm_fixupimm_ss::<5>(a, b, c);
59890 let e = _mm_set_ps(0., 0., 0., -0.0);
59891 assert_eq_m128(r, e);
59892 }
59893
59894 #[simd_test(enable = "avx512f")]
59895 unsafe fn test_mm_mask_fixupimm_ss() {
59896 let a = _mm_set_ps(0., 0., 0., f32::NAN);
59897 let b = _mm_set1_ps(f32::MAX);
59898 let c = _mm_set1_epi32(i32::MAX);
59899 let r = _mm_mask_fixupimm_ss::<5>(a, 0b11111111, b, c);
59900 let e = _mm_set_ps(0., 0., 0., -0.0);
59901 assert_eq_m128(r, e);
59902 }
59903
59904 #[simd_test(enable = "avx512f")]
59905 unsafe fn test_mm_maskz_fixupimm_ss() {
59906 let a = _mm_set_ps(0., 0., 0., f32::NAN);
59907 let b = _mm_set1_ps(f32::MAX);
59908 let c = _mm_set1_epi32(i32::MAX);
59909 let r = _mm_maskz_fixupimm_ss::<5>(0b00000000, a, b, c);
59910 let e = _mm_set_ps(0., 0., 0., 0.0);
59911 assert_eq_m128(r, e);
59912 let r = _mm_maskz_fixupimm_ss::<5>(0b11111111, a, b, c);
59913 let e = _mm_set_ps(0., 0., 0., -0.0);
59914 assert_eq_m128(r, e);
59915 }
59916
59917 #[simd_test(enable = "avx512f")]
59918 unsafe fn test_mm_fixupimm_sd() {
59919 let a = _mm_set_pd(0., f64::NAN);
59920 let b = _mm_set1_pd(f64::MAX);
59921 let c = _mm_set1_epi64x(i32::MAX as i64);
59922 let r = _mm_fixupimm_sd::<5>(a, b, c);
59923 let e = _mm_set_pd(0., -0.0);
59924 assert_eq_m128d(r, e);
59925 }
59926
59927 #[simd_test(enable = "avx512f")]
59928 unsafe fn test_mm_mask_fixupimm_sd() {
59929 let a = _mm_set_pd(0., f64::NAN);
59930 let b = _mm_set1_pd(f64::MAX);
59931 let c = _mm_set1_epi64x(i32::MAX as i64);
59932 let r = _mm_mask_fixupimm_sd::<5>(a, 0b11111111, b, c);
59933 let e = _mm_set_pd(0., -0.0);
59934 assert_eq_m128d(r, e);
59935 }
59936
59937 #[simd_test(enable = "avx512f")]
59938 unsafe fn test_mm_maskz_fixupimm_sd() {
59939 let a = _mm_set_pd(0., f64::NAN);
59940 let b = _mm_set1_pd(f64::MAX);
59941 let c = _mm_set1_epi64x(i32::MAX as i64);
59942 let r = _mm_maskz_fixupimm_sd::<5>(0b00000000, a, b, c);
59943 let e = _mm_set_pd(0., 0.0);
59944 assert_eq_m128d(r, e);
59945 let r = _mm_maskz_fixupimm_sd::<5>(0b11111111, a, b, c);
59946 let e = _mm_set_pd(0., -0.0);
59947 assert_eq_m128d(r, e);
59948 }
59949
59950 #[simd_test(enable = "avx512f")]
59951 unsafe fn test_mm_fixupimm_round_ss() {
59952 let a = _mm_set_ps(1., 0., 0., f32::NAN);
59953 let b = _mm_set1_ps(f32::MAX);
59954 let c = _mm_set1_epi32(i32::MAX);
59955 let r = _mm_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
59956 let e = _mm_set_ps(1., 0., 0., -0.0);
59957 assert_eq_m128(r, e);
59958 }
59959
59960 #[simd_test(enable = "avx512f")]
59961 unsafe fn test_mm_mask_fixupimm_round_ss() {
59962 let a = _mm_set_ps(0., 0., 0., f32::NAN);
59963 let b = _mm_set1_ps(f32::MAX);
59964 let c = _mm_set1_epi32(i32::MAX);
59965 let r = _mm_mask_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
59966 let e = _mm_set_ps(0., 0., 0., -0.0);
59967 assert_eq_m128(r, e);
59968 }
59969
59970 #[simd_test(enable = "avx512f")]
59971 unsafe fn test_mm_maskz_fixupimm_round_ss() {
59972 let a = _mm_set_ps(0., 0., 0., f32::NAN);
59973 let b = _mm_set1_ps(f32::MAX);
59974 let c = _mm_set1_epi32(i32::MAX);
59975 let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
59976 let e = _mm_set_ps(0., 0., 0., 0.0);
59977 assert_eq_m128(r, e);
59978 let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
59979 let e = _mm_set_ps(0., 0., 0., -0.0);
59980 assert_eq_m128(r, e);
59981 }
59982
59983 #[simd_test(enable = "avx512f")]
59984 unsafe fn test_mm_fixupimm_round_sd() {
59985 let a = _mm_set_pd(0., f64::NAN);
59986 let b = _mm_set1_pd(f64::MAX);
59987 let c = _mm_set1_epi64x(i32::MAX as i64);
59988 let r = _mm_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
59989 let e = _mm_set_pd(0., -0.0);
59990 assert_eq_m128d(r, e);
59991 }
59992
59993 #[simd_test(enable = "avx512f")]
59994 unsafe fn test_mm_mask_fixupimm_round_sd() {
59995 let a = _mm_set_pd(0., f64::NAN);
59996 let b = _mm_set1_pd(f64::MAX);
59997 let c = _mm_set1_epi64x(i32::MAX as i64);
59998 let r = _mm_mask_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
59999 let e = _mm_set_pd(0., -0.0);
60000 assert_eq_m128d(r, e);
60001 }
60002
60003 #[simd_test(enable = "avx512f")]
60004 unsafe fn test_mm_maskz_fixupimm_round_sd() {
60005 let a = _mm_set_pd(0., f64::NAN);
60006 let b = _mm_set1_pd(f64::MAX);
60007 let c = _mm_set1_epi64x(i32::MAX as i64);
60008 let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
60009 let e = _mm_set_pd(0., 0.0);
60010 assert_eq_m128d(r, e);
60011 let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
60012 let e = _mm_set_pd(0., -0.0);
60013 assert_eq_m128d(r, e);
60014 }
60015
60016 #[simd_test(enable = "avx512f")]
60017 unsafe fn test_mm_mask_cvtss_sd() {
60018 let a = _mm_set_pd(6., -7.5);
60019 let b = _mm_set_ps(0., -0.5, 1., -1.5);
60020 let r = _mm_mask_cvtss_sd(a, 0, a, b);
60021 assert_eq_m128d(r, a);
60022 let r = _mm_mask_cvtss_sd(a, 0b11111111, a, b);
60023 let e = _mm_set_pd(6., -1.5);
60024 assert_eq_m128d(r, e);
60025 }
60026
60027 #[simd_test(enable = "avx512f")]
60028 unsafe fn test_mm_maskz_cvtss_sd() {
60029 let a = _mm_set_pd(6., -7.5);
60030 let b = _mm_set_ps(0., -0.5, 1., -1.5);
60031 let r = _mm_maskz_cvtss_sd(0, a, b);
60032 let e = _mm_set_pd(6., 0.);
60033 assert_eq_m128d(r, e);
60034 let r = _mm_maskz_cvtss_sd(0b11111111, a, b);
60035 let e = _mm_set_pd(6., -1.5);
60036 assert_eq_m128d(r, e);
60037 }
60038
60039 #[simd_test(enable = "avx512f")]
60040 unsafe fn test_mm_mask_cvtsd_ss() {
60041 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60042 let b = _mm_set_pd(6., -7.5);
60043 let r = _mm_mask_cvtsd_ss(a, 0, a, b);
60044 assert_eq_m128(r, a);
60045 let r = _mm_mask_cvtsd_ss(a, 0b11111111, a, b);
60046 let e = _mm_set_ps(0., -0.5, 1., -7.5);
60047 assert_eq_m128(r, e);
60048 }
60049
60050 #[simd_test(enable = "avx512f")]
60051 unsafe fn test_mm_maskz_cvtsd_ss() {
60052 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60053 let b = _mm_set_pd(6., -7.5);
60054 let r = _mm_maskz_cvtsd_ss(0, a, b);
60055 let e = _mm_set_ps(0., -0.5, 1., 0.);
60056 assert_eq_m128(r, e);
60057 let r = _mm_maskz_cvtsd_ss(0b11111111, a, b);
60058 let e = _mm_set_ps(0., -0.5, 1., -7.5);
60059 assert_eq_m128(r, e);
60060 }
60061
60062 #[simd_test(enable = "avx512f")]
60063 unsafe fn test_mm_cvt_roundss_sd() {
60064 let a = _mm_set_pd(6., -7.5);
60065 let b = _mm_set_ps(0., -0.5, 1., -1.5);
60066 let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
60067 let e = _mm_set_pd(6., -1.5);
60068 assert_eq_m128d(r, e);
60069 }
60070
60071 #[simd_test(enable = "avx512f")]
60072 unsafe fn test_mm_mask_cvt_roundss_sd() {
60073 let a = _mm_set_pd(6., -7.5);
60074 let b = _mm_set_ps(0., -0.5, 1., -1.5);
60075 let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
60076 assert_eq_m128d(r, a);
60077 let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60078 let e = _mm_set_pd(6., -1.5);
60079 assert_eq_m128d(r, e);
60080 }
60081
60082 #[simd_test(enable = "avx512f")]
60083 unsafe fn test_mm_maskz_cvt_roundss_sd() {
60084 let a = _mm_set_pd(6., -7.5);
60085 let b = _mm_set_ps(0., -0.5, 1., -1.5);
60086 let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60087 let e = _mm_set_pd(6., 0.);
60088 assert_eq_m128d(r, e);
60089 let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60090 let e = _mm_set_pd(6., -1.5);
60091 assert_eq_m128d(r, e);
60092 }
60093
60094 #[simd_test(enable = "avx512f")]
60095 unsafe fn test_mm_cvt_roundsd_ss() {
60096 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60097 let b = _mm_set_pd(6., -7.5);
60098 let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60099 let e = _mm_set_ps(0., -0.5, 1., -7.5);
60100 assert_eq_m128(r, e);
60101 }
60102
60103 #[simd_test(enable = "avx512f")]
60104 unsafe fn test_mm_mask_cvt_roundsd_ss() {
60105 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60106 let b = _mm_set_pd(6., -7.5);
60107 let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
60108 assert_eq_m128(r, a);
60109 let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60110 a, 0b11111111, a, b,
60111 );
60112 let e = _mm_set_ps(0., -0.5, 1., -7.5);
60113 assert_eq_m128(r, e);
60114 }
60115
60116 #[simd_test(enable = "avx512f")]
60117 unsafe fn test_mm_maskz_cvt_roundsd_ss() {
60118 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60119 let b = _mm_set_pd(6., -7.5);
60120 let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60121 let e = _mm_set_ps(0., -0.5, 1., 0.);
60122 assert_eq_m128(r, e);
60123 let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60124 0b11111111, a, b,
60125 );
60126 let e = _mm_set_ps(0., -0.5, 1., -7.5);
60127 assert_eq_m128(r, e);
60128 }
60129
60130 #[simd_test(enable = "avx512f")]
60131 unsafe fn test_mm_cvt_roundss_si32() {
60132 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60133 let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60134 let e: i32 = -1;
60135 assert_eq!(r, e);
60136 }
60137
60138 #[simd_test(enable = "avx512f")]
60139 unsafe fn test_mm_cvt_roundss_i32() {
60140 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60141 let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60142 let e: i32 = -1;
60143 assert_eq!(r, e);
60144 }
60145
60146 #[simd_test(enable = "avx512f")]
60147 unsafe fn test_mm_cvt_roundss_u32() {
60148 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60149 let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60150 let e: u32 = u32::MAX;
60151 assert_eq!(r, e);
60152 }
60153
60154 #[simd_test(enable = "avx512f")]
60155 unsafe fn test_mm_cvtss_i32() {
60156 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60157 let r = _mm_cvtss_i32(a);
60158 let e: i32 = -2;
60159 assert_eq!(r, e);
60160 }
60161
60162 #[simd_test(enable = "avx512f")]
60163 unsafe fn test_mm_cvtss_u32() {
60164 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60165 let r = _mm_cvtss_u32(a);
60166 let e: u32 = u32::MAX;
60167 assert_eq!(r, e);
60168 }
60169
60170 #[simd_test(enable = "avx512f")]
60171 unsafe fn test_mm_cvt_roundsd_si32() {
60172 let a = _mm_set_pd(1., -1.5);
60173 let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60174 let e: i32 = -1;
60175 assert_eq!(r, e);
60176 }
60177
60178 #[simd_test(enable = "avx512f")]
60179 unsafe fn test_mm_cvt_roundsd_i32() {
60180 let a = _mm_set_pd(1., -1.5);
60181 let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60182 let e: i32 = -1;
60183 assert_eq!(r, e);
60184 }
60185
60186 #[simd_test(enable = "avx512f")]
60187 unsafe fn test_mm_cvt_roundsd_u32() {
60188 let a = _mm_set_pd(1., -1.5);
60189 let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60190 let e: u32 = u32::MAX;
60191 assert_eq!(r, e);
60192 }
60193
60194 #[simd_test(enable = "avx512f")]
60195 unsafe fn test_mm_cvtsd_i32() {
60196 let a = _mm_set_pd(1., -1.5);
60197 let r = _mm_cvtsd_i32(a);
60198 let e: i32 = -2;
60199 assert_eq!(r, e);
60200 }
60201
60202 #[simd_test(enable = "avx512f")]
60203 unsafe fn test_mm_cvtsd_u32() {
60204 let a = _mm_set_pd(1., -1.5);
60205 let r = _mm_cvtsd_u32(a);
60206 let e: u32 = u32::MAX;
60207 assert_eq!(r, e);
60208 }
60209
60210 #[simd_test(enable = "avx512f")]
60211 unsafe fn test_mm_cvt_roundi32_ss() {
60212 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60213 let b: i32 = 9;
60214 let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60215 let e = _mm_set_ps(0., -0.5, 1., 9.);
60216 assert_eq_m128(r, e);
60217 }
60218
60219 #[simd_test(enable = "avx512f")]
60220 unsafe fn test_mm_cvt_roundsi32_ss() {
60221 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60222 let b: i32 = 9;
60223 let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60224 let e = _mm_set_ps(0., -0.5, 1., 9.);
60225 assert_eq_m128(r, e);
60226 }
60227
60228 #[simd_test(enable = "avx512f")]
60229 unsafe fn test_mm_cvt_roundu32_ss() {
60230 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60231 let b: u32 = 9;
60232 let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60233 let e = _mm_set_ps(0., -0.5, 1., 9.);
60234 assert_eq_m128(r, e);
60235 }
60236
60237 #[simd_test(enable = "avx512f")]
60238 unsafe fn test_mm_cvti32_ss() {
60239 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60240 let b: i32 = 9;
60241 let r = _mm_cvti32_ss(a, b);
60242 let e = _mm_set_ps(0., -0.5, 1., 9.);
60243 assert_eq_m128(r, e);
60244 }
60245
60246 #[simd_test(enable = "avx512f")]
60247 unsafe fn test_mm_cvti32_sd() {
60248 let a = _mm_set_pd(1., -1.5);
60249 let b: i32 = 9;
60250 let r = _mm_cvti32_sd(a, b);
60251 let e = _mm_set_pd(1., 9.);
60252 assert_eq_m128d(r, e);
60253 }
60254
60255 #[simd_test(enable = "avx512f")]
60256 unsafe fn test_mm_cvtt_roundss_si32() {
60257 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60258 let r = _mm_cvtt_roundss_si32::<_MM_FROUND_NO_EXC>(a);
60259 let e: i32 = -1;
60260 assert_eq!(r, e);
60261 }
60262
60263 #[simd_test(enable = "avx512f")]
60264 unsafe fn test_mm_cvtt_roundss_i32() {
60265 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60266 let r = _mm_cvtt_roundss_i32::<_MM_FROUND_NO_EXC>(a);
60267 let e: i32 = -1;
60268 assert_eq!(r, e);
60269 }
60270
60271 #[simd_test(enable = "avx512f")]
60272 unsafe fn test_mm_cvtt_roundss_u32() {
60273 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60274 let r = _mm_cvtt_roundss_u32::<_MM_FROUND_NO_EXC>(a);
60275 let e: u32 = u32::MAX;
60276 assert_eq!(r, e);
60277 }
60278
60279 #[simd_test(enable = "avx512f")]
60280 unsafe fn test_mm_cvttss_i32() {
60281 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60282 let r = _mm_cvttss_i32(a);
60283 let e: i32 = -1;
60284 assert_eq!(r, e);
60285 }
60286
60287 #[simd_test(enable = "avx512f")]
60288 unsafe fn test_mm_cvttss_u32() {
60289 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60290 let r = _mm_cvttss_u32(a);
60291 let e: u32 = u32::MAX;
60292 assert_eq!(r, e);
60293 }
60294
60295 #[simd_test(enable = "avx512f")]
60296 unsafe fn test_mm_cvtt_roundsd_si32() {
60297 let a = _mm_set_pd(1., -1.5);
60298 let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_NO_EXC>(a);
60299 let e: i32 = -1;
60300 assert_eq!(r, e);
60301 }
60302
60303 #[simd_test(enable = "avx512f")]
60304 unsafe fn test_mm_cvtt_roundsd_i32() {
60305 let a = _mm_set_pd(1., -1.5);
60306 let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_NO_EXC>(a);
60307 let e: i32 = -1;
60308 assert_eq!(r, e);
60309 }
60310
60311 #[simd_test(enable = "avx512f")]
60312 unsafe fn test_mm_cvtt_roundsd_u32() {
60313 let a = _mm_set_pd(1., -1.5);
60314 let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_NO_EXC>(a);
60315 let e: u32 = u32::MAX;
60316 assert_eq!(r, e);
60317 }
60318
60319 #[simd_test(enable = "avx512f")]
60320 unsafe fn test_mm_cvttsd_i32() {
60321 let a = _mm_set_pd(1., -1.5);
60322 let r = _mm_cvttsd_i32(a);
60323 let e: i32 = -1;
60324 assert_eq!(r, e);
60325 }
60326
60327 #[simd_test(enable = "avx512f")]
60328 unsafe fn test_mm_cvttsd_u32() {
60329 let a = _mm_set_pd(1., -1.5);
60330 let r = _mm_cvttsd_u32(a);
60331 let e: u32 = u32::MAX;
60332 assert_eq!(r, e);
60333 }
60334
60335 #[simd_test(enable = "avx512f")]
60336 unsafe fn test_mm_cvtu32_ss() {
60337 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60338 let b: u32 = 9;
60339 let r = _mm_cvtu32_ss(a, b);
60340 let e = _mm_set_ps(0., -0.5, 1., 9.);
60341 assert_eq_m128(r, e);
60342 }
60343
60344 #[simd_test(enable = "avx512f")]
60345 unsafe fn test_mm_cvtu32_sd() {
60346 let a = _mm_set_pd(1., -1.5);
60347 let b: u32 = 9;
60348 let r = _mm_cvtu32_sd(a, b);
60349 let e = _mm_set_pd(1., 9.);
60350 assert_eq_m128d(r, e);
60351 }
60352
60353 #[simd_test(enable = "avx512f")]
60354 unsafe fn test_mm_comi_round_ss() {
60355 let a = _mm_set1_ps(2.2);
60356 let b = _mm_set1_ps(1.1);
60357 let r = _mm_comi_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
60358 let e: i32 = 0;
60359 assert_eq!(r, e);
60360 }
60361
60362 #[simd_test(enable = "avx512f")]
60363 unsafe fn test_mm_comi_round_sd() {
60364 let a = _mm_set1_pd(2.2);
60365 let b = _mm_set1_pd(1.1);
60366 let r = _mm_comi_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
60367 let e: i32 = 0;
60368 assert_eq!(r, e);
60369 }
60370
60371 #[simd_test(enable = "avx512f")]
60372 unsafe fn test_mm512_cvtsi512_si32() {
60373 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
60374 let r = _mm512_cvtsi512_si32(a);
60375 let e: i32 = 1;
60376 assert_eq!(r, e);
60377 }
60378
60379 #[simd_test(enable = "avx512f")]
60380 unsafe fn test_mm512_cvtss_f32() {
60381 let a = _mm512_setr_ps(
60382 312.0134, 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
60383 );
60384 assert_eq!(_mm512_cvtss_f32(a), 312.0134);
60385 }
60386
60387 #[simd_test(enable = "avx512f")]
60388 unsafe fn test_mm512_cvtsd_f64() {
60389 let r = _mm512_cvtsd_f64(_mm512_setr_pd(-1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8));
60390 assert_eq!(r, -1.1);
60391 }
60392
60393 #[simd_test(enable = "avx512f")]
60394 unsafe fn test_mm512_shuffle_pd() {
60395 let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60396 let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60397 let r = _mm512_shuffle_pd::<0b11_11_11_11>(a, b);
60398 let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
60399 assert_eq_m512d(r, e);
60400 }
60401
60402 #[simd_test(enable = "avx512f")]
60403 unsafe fn test_mm512_mask_shuffle_pd() {
60404 let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60405 let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60406 let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
60407 assert_eq_m512d(r, a);
60408 let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0b11111111, a, b);
60409 let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
60410 assert_eq_m512d(r, e);
60411 }
60412
60413 #[simd_test(enable = "avx512f")]
60414 unsafe fn test_mm512_maskz_shuffle_pd() {
60415 let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60416 let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60417 let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
60418 assert_eq_m512d(r, _mm512_setzero_pd());
60419 let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b);
60420 let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.);
60421 assert_eq_m512d(r, e);
60422 }
60423
60424 #[simd_test(enable = "avx512f")]
60425 unsafe fn test_mm512_mask_expandloadu_epi32() {
60426 let src = _mm512_set1_epi32(42);
60427 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
60428 let p = a.as_ptr();
60429 let m = 0b11101000_11001010;
60430 let r = _mm512_mask_expandloadu_epi32(src, m, black_box(p));
60431 let e = _mm512_set_epi32(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
60432 assert_eq_m512i(r, e);
60433 }
60434
60435 #[simd_test(enable = "avx512f")]
60436 unsafe fn test_mm512_maskz_expandloadu_epi32() {
60437 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
60438 let p = a.as_ptr();
60439 let m = 0b11101000_11001010;
60440 let r = _mm512_maskz_expandloadu_epi32(m, black_box(p));
60441 let e = _mm512_set_epi32(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
60442 assert_eq_m512i(r, e);
60443 }
60444
60445 #[simd_test(enable = "avx512f,avx512vl")]
60446 unsafe fn test_mm256_mask_expandloadu_epi32() {
60447 let src = _mm256_set1_epi32(42);
60448 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
60449 let p = a.as_ptr();
60450 let m = 0b11101000;
60451 let r = _mm256_mask_expandloadu_epi32(src, m, black_box(p));
60452 let e = _mm256_set_epi32(4, 3, 2, 42, 1, 42, 42, 42);
60453 assert_eq_m256i(r, e);
60454 }
60455
60456 #[simd_test(enable = "avx512f,avx512vl")]
60457 unsafe fn test_mm256_maskz_expandloadu_epi32() {
60458 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
60459 let p = a.as_ptr();
60460 let m = 0b11101000;
60461 let r = _mm256_maskz_expandloadu_epi32(m, black_box(p));
60462 let e = _mm256_set_epi32(4, 3, 2, 0, 1, 0, 0, 0);
60463 assert_eq_m256i(r, e);
60464 }
60465
60466 #[simd_test(enable = "avx512f,avx512vl")]
60467 unsafe fn test_mm_mask_expandloadu_epi32() {
60468 let src = _mm_set1_epi32(42);
60469 let a = &[1_i32, 2, 3, 4];
60470 let p = a.as_ptr();
60471 let m = 0b11111000;
60472 let r = _mm_mask_expandloadu_epi32(src, m, black_box(p));
60473 let e = _mm_set_epi32(1, 42, 42, 42);
60474 assert_eq_m128i(r, e);
60475 }
60476
60477 #[simd_test(enable = "avx512f,avx512vl")]
60478 unsafe fn test_mm_maskz_expandloadu_epi32() {
60479 let a = &[1_i32, 2, 3, 4];
60480 let p = a.as_ptr();
60481 let m = 0b11111000;
60482 let r = _mm_maskz_expandloadu_epi32(m, black_box(p));
60483 let e = _mm_set_epi32(1, 0, 0, 0);
60484 assert_eq_m128i(r, e);
60485 }
60486
60487 #[simd_test(enable = "avx512f")]
60488 unsafe fn test_mm512_mask_expandloadu_epi64() {
60489 let src = _mm512_set1_epi64(42);
60490 let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
60491 let p = a.as_ptr();
60492 let m = 0b11101000;
60493 let r = _mm512_mask_expandloadu_epi64(src, m, black_box(p));
60494 let e = _mm512_set_epi64(4, 3, 2, 42, 1, 42, 42, 42);
60495 assert_eq_m512i(r, e);
60496 }
60497
60498 #[simd_test(enable = "avx512f")]
60499 unsafe fn test_mm512_maskz_expandloadu_epi64() {
60500 let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
60501 let p = a.as_ptr();
60502 let m = 0b11101000;
60503 let r = _mm512_maskz_expandloadu_epi64(m, black_box(p));
60504 let e = _mm512_set_epi64(4, 3, 2, 0, 1, 0, 0, 0);
60505 assert_eq_m512i(r, e);
60506 }
60507
60508 #[simd_test(enable = "avx512f,avx512vl")]
60509 unsafe fn test_mm256_mask_expandloadu_epi64() {
60510 let src = _mm256_set1_epi64x(42);
60511 let a = &[1_i64, 2, 3, 4];
60512 let p = a.as_ptr();
60513 let m = 0b11101000;
60514 let r = _mm256_mask_expandloadu_epi64(src, m, black_box(p));
60515 let e = _mm256_set_epi64x(1, 42, 42, 42);
60516 assert_eq_m256i(r, e);
60517 }
60518
60519 #[simd_test(enable = "avx512f,avx512vl")]
60520 unsafe fn test_mm256_maskz_expandloadu_epi64() {
60521 let a = &[1_i64, 2, 3, 4];
60522 let p = a.as_ptr();
60523 let m = 0b11101000;
60524 let r = _mm256_maskz_expandloadu_epi64(m, black_box(p));
60525 let e = _mm256_set_epi64x(1, 0, 0, 0);
60526 assert_eq_m256i(r, e);
60527 }
60528
60529 #[simd_test(enable = "avx512f,avx512vl")]
60530 unsafe fn test_mm_mask_expandloadu_epi64() {
60531 let src = _mm_set1_epi64x(42);
60532 let a = &[1_i64, 2];
60533 let p = a.as_ptr();
60534 let m = 0b11101000;
60535 let r = _mm_mask_expandloadu_epi64(src, m, black_box(p));
60536 let e = _mm_set_epi64x(42, 42);
60537 assert_eq_m128i(r, e);
60538 }
60539
60540 #[simd_test(enable = "avx512f,avx512vl")]
60541 unsafe fn test_mm_maskz_expandloadu_epi64() {
60542 let a = &[1_i64, 2];
60543 let p = a.as_ptr();
60544 let m = 0b11101000;
60545 let r = _mm_maskz_expandloadu_epi64(m, black_box(p));
60546 let e = _mm_set_epi64x(0, 0);
60547 assert_eq_m128i(r, e);
60548 }
60549
60550 #[simd_test(enable = "avx512f")]
60551 unsafe fn test_mm512_mask_expandloadu_ps() {
60552 let src = _mm512_set1_ps(42.);
60553 let a = &[
60554 1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
60555 ];
60556 let p = a.as_ptr();
60557 let m = 0b11101000_11001010;
60558 let r = _mm512_mask_expandloadu_ps(src, m, black_box(p));
60559 let e = _mm512_set_ps(
60560 8., 7., 6., 42., 5., 42., 42., 42., 4., 3., 42., 42., 2., 42., 1., 42.,
60561 );
60562 assert_eq_m512(r, e);
60563 }
60564
60565 #[simd_test(enable = "avx512f")]
60566 unsafe fn test_mm512_maskz_expandloadu_ps() {
60567 let a = &[
60568 1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
60569 ];
60570 let p = a.as_ptr();
60571 let m = 0b11101000_11001010;
60572 let r = _mm512_maskz_expandloadu_ps(m, black_box(p));
60573 let e = _mm512_set_ps(
60574 8., 7., 6., 0., 5., 0., 0., 0., 4., 3., 0., 0., 2., 0., 1., 0.,
60575 );
60576 assert_eq_m512(r, e);
60577 }
60578
60579 #[simd_test(enable = "avx512f,avx512vl")]
60580 unsafe fn test_mm256_mask_expandloadu_ps() {
60581 let src = _mm256_set1_ps(42.);
60582 let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
60583 let p = a.as_ptr();
60584 let m = 0b11101000;
60585 let r = _mm256_mask_expandloadu_ps(src, m, black_box(p));
60586 let e = _mm256_set_ps(4., 3., 2., 42., 1., 42., 42., 42.);
60587 assert_eq_m256(r, e);
60588 }
60589
60590 #[simd_test(enable = "avx512f,avx512vl")]
60591 unsafe fn test_mm256_maskz_expandloadu_ps() {
60592 let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
60593 let p = a.as_ptr();
60594 let m = 0b11101000;
60595 let r = _mm256_maskz_expandloadu_ps(m, black_box(p));
60596 let e = _mm256_set_ps(4., 3., 2., 0., 1., 0., 0., 0.);
60597 assert_eq_m256(r, e);
60598 }
60599
60600 #[simd_test(enable = "avx512f,avx512vl")]
60601 unsafe fn test_mm_mask_expandloadu_ps() {
60602 let src = _mm_set1_ps(42.);
60603 let a = &[1.0f32, 2., 3., 4.];
60604 let p = a.as_ptr();
60605 let m = 0b11101000;
60606 let r = _mm_mask_expandloadu_ps(src, m, black_box(p));
60607 let e = _mm_set_ps(1., 42., 42., 42.);
60608 assert_eq_m128(r, e);
60609 }
60610
60611 #[simd_test(enable = "avx512f,avx512vl")]
60612 unsafe fn test_mm_maskz_expandloadu_ps() {
60613 let a = &[1.0f32, 2., 3., 4.];
60614 let p = a.as_ptr();
60615 let m = 0b11101000;
60616 let r = _mm_maskz_expandloadu_ps(m, black_box(p));
60617 let e = _mm_set_ps(1., 0., 0., 0.);
60618 assert_eq_m128(r, e);
60619 }
60620
60621 #[simd_test(enable = "avx512f")]
60622 unsafe fn test_mm512_mask_expandloadu_pd() {
60623 let src = _mm512_set1_pd(42.);
60624 let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
60625 let p = a.as_ptr();
60626 let m = 0b11101000;
60627 let r = _mm512_mask_expandloadu_pd(src, m, black_box(p));
60628 let e = _mm512_set_pd(4., 3., 2., 42., 1., 42., 42., 42.);
60629 assert_eq_m512d(r, e);
60630 }
60631
60632 #[simd_test(enable = "avx512f")]
60633 unsafe fn test_mm512_maskz_expandloadu_pd() {
60634 let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
60635 let p = a.as_ptr();
60636 let m = 0b11101000;
60637 let r = _mm512_maskz_expandloadu_pd(m, black_box(p));
60638 let e = _mm512_set_pd(4., 3., 2., 0., 1., 0., 0., 0.);
60639 assert_eq_m512d(r, e);
60640 }
60641
60642 #[simd_test(enable = "avx512f,avx512vl")]
60643 unsafe fn test_mm256_mask_expandloadu_pd() {
60644 let src = _mm256_set1_pd(42.);
60645 let a = &[1.0f64, 2., 3., 4.];
60646 let p = a.as_ptr();
60647 let m = 0b11101000;
60648 let r = _mm256_mask_expandloadu_pd(src, m, black_box(p));
60649 let e = _mm256_set_pd(1., 42., 42., 42.);
60650 assert_eq_m256d(r, e);
60651 }
60652
60653 #[simd_test(enable = "avx512f,avx512vl")]
60654 unsafe fn test_mm256_maskz_expandloadu_pd() {
60655 let a = &[1.0f64, 2., 3., 4.];
60656 let p = a.as_ptr();
60657 let m = 0b11101000;
60658 let r = _mm256_maskz_expandloadu_pd(m, black_box(p));
60659 let e = _mm256_set_pd(1., 0., 0., 0.);
60660 assert_eq_m256d(r, e);
60661 }
60662
60663 #[simd_test(enable = "avx512f,avx512vl")]
60664 unsafe fn test_mm_mask_expandloadu_pd() {
60665 let src = _mm_set1_pd(42.);
60666 let a = &[1.0f64, 2.];
60667 let p = a.as_ptr();
60668 let m = 0b11101000;
60669 let r = _mm_mask_expandloadu_pd(src, m, black_box(p));
60670 let e = _mm_set_pd(42., 42.);
60671 assert_eq_m128d(r, e);
60672 }
60673
60674 #[simd_test(enable = "avx512f,avx512vl")]
60675 unsafe fn test_mm_maskz_expandloadu_pd() {
60676 let a = &[1.0f64, 2.];
60677 let p = a.as_ptr();
60678 let m = 0b11101000;
60679 let r = _mm_maskz_expandloadu_pd(m, black_box(p));
60680 let e = _mm_set_pd(0., 0.);
60681 assert_eq_m128d(r, e);
60682 }
60683}
60684