1use crate::{
2 arch::asm,
3 core_arch::{simd::*, x86::*},
4 intrinsics::simd::*,
5 intrinsics::{fmaf32, fmaf64},
6 mem, ptr,
7};
8
9use core::hint::unreachable_unchecked;
10#[cfg(test)]
11use stdarch_test::assert_instr;
12
13/// Computes the absolute values of packed 32-bit integers in `a`.
14///
15/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi32&expand=39)
16#[inline]
17#[target_feature(enable = "avx512f")]
18#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19#[cfg_attr(test, assert_instr(vpabsd))]
20pub fn _mm512_abs_epi32(a: __m512i) -> __m512i {
21 unsafe {
22 let a: i32x16 = a.as_i32x16();
23 let r: i32x16 = simd_select::<i32x16, _>(mask:simd_lt(a, i32x16::ZERO), if_true:simd_neg(a), if_false:a);
24 transmute(src:r)
25 }
26}
27
28/// Computes the absolute value of packed 32-bit integers in `a`, and store the
29/// unsigned results in `dst` using writemask `k` (elements are copied from
30/// `src` when the corresponding mask bit is not set).
31///
32/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi32&expand=40)
33#[inline]
34#[target_feature(enable = "avx512f")]
35#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36#[cfg_attr(test, assert_instr(vpabsd))]
37pub fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
38 unsafe {
39 let abs: i32x16 = _mm512_abs_epi32(a).as_i32x16();
40 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x16()))
41 }
42}
43
44/// Computes the absolute value of packed 32-bit integers in `a`, and store the
45/// unsigned results in `dst` using zeromask `k` (elements are zeroed out when
46/// the corresponding mask bit is not set).
47///
48/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi32&expand=41)
49#[inline]
50#[target_feature(enable = "avx512f")]
51#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
52#[cfg_attr(test, assert_instr(vpabsd))]
53pub fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
54 unsafe {
55 let abs: i32x16 = _mm512_abs_epi32(a).as_i32x16();
56 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i32x16::ZERO))
57 }
58}
59
60/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
61///
62/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi32&expand=37)
63#[inline]
64#[target_feature(enable = "avx512f,avx512vl")]
65#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
66#[cfg_attr(test, assert_instr(vpabsd))]
67pub fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
68 unsafe {
69 let abs: i32x8 = _mm256_abs_epi32(a).as_i32x8();
70 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x8()))
71 }
72}
73
74/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
75///
76/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi32&expand=38)
77#[inline]
78#[target_feature(enable = "avx512f,avx512vl")]
79#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
80#[cfg_attr(test, assert_instr(vpabsd))]
81pub fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
82 unsafe {
83 let abs: i32x8 = _mm256_abs_epi32(a).as_i32x8();
84 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i32x8::ZERO))
85 }
86}
87
88/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
89///
90/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi32&expand=34)
91#[inline]
92#[target_feature(enable = "avx512f,avx512vl")]
93#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
94#[cfg_attr(test, assert_instr(vpabsd))]
95pub fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
96 unsafe {
97 let abs: i32x4 = _mm_abs_epi32(a).as_i32x4();
98 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i32x4()))
99 }
100}
101
102/// Compute the absolute value of packed signed 32-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
103///
104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi32&expand=35)
105#[inline]
106#[target_feature(enable = "avx512f,avx512vl")]
107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
108#[cfg_attr(test, assert_instr(vpabsd))]
109pub fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
110 unsafe {
111 let abs: i32x4 = _mm_abs_epi32(a).as_i32x4();
112 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i32x4::ZERO))
113 }
114}
115
116/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
117///
118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_epi64&expand=48)
119#[inline]
120#[target_feature(enable = "avx512f")]
121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
122#[cfg_attr(test, assert_instr(vpabsq))]
123pub fn _mm512_abs_epi64(a: __m512i) -> __m512i {
124 unsafe {
125 let a: i64x8 = a.as_i64x8();
126 let r: i64x8 = simd_select::<i64x8, _>(mask:simd_lt(a, i64x8::ZERO), if_true:simd_neg(a), if_false:a);
127 transmute(src:r)
128 }
129}
130
131/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
132///
133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_epi64&expand=49)
134#[inline]
135#[target_feature(enable = "avx512f")]
136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
137#[cfg_attr(test, assert_instr(vpabsq))]
138pub fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
139 unsafe {
140 let abs: i64x8 = _mm512_abs_epi64(a).as_i64x8();
141 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x8()))
142 }
143}
144
145/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
146///
147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_abs_epi64&expand=50)
148#[inline]
149#[target_feature(enable = "avx512f")]
150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
151#[cfg_attr(test, assert_instr(vpabsq))]
152pub fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
153 unsafe {
154 let abs: i64x8 = _mm512_abs_epi64(a).as_i64x8();
155 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i64x8::ZERO))
156 }
157}
158
159/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
160///
161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_abs_epi64&expand=45)
162#[inline]
163#[target_feature(enable = "avx512f,avx512vl")]
164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
165#[cfg_attr(test, assert_instr(vpabsq))]
166pub fn _mm256_abs_epi64(a: __m256i) -> __m256i {
167 unsafe {
168 let a: i64x4 = a.as_i64x4();
169 let r: i64x4 = simd_select::<i64x4, _>(mask:simd_lt(a, i64x4::ZERO), if_true:simd_neg(a), if_false:a);
170 transmute(src:r)
171 }
172}
173
174/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
175///
176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_abs_epi64&expand=46)
177#[inline]
178#[target_feature(enable = "avx512f,avx512vl")]
179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
180#[cfg_attr(test, assert_instr(vpabsq))]
181pub fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
182 unsafe {
183 let abs: i64x4 = _mm256_abs_epi64(a).as_i64x4();
184 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x4()))
185 }
186}
187
188/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
189///
190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_abs_epi64)
191#[inline]
192#[target_feature(enable = "avx512f,avx512vl")]
193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
194#[cfg_attr(test, assert_instr(vpabsq))]
195pub fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
196 unsafe {
197 let abs: i64x4 = _mm256_abs_epi64(a).as_i64x4();
198 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i64x4::ZERO))
199 }
200}
201
202/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst.
203///
204/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_abs_epi64)
205#[inline]
206#[target_feature(enable = "avx512f,avx512vl")]
207#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
208#[cfg_attr(test, assert_instr(vpabsq))]
209pub fn _mm_abs_epi64(a: __m128i) -> __m128i {
210 unsafe {
211 let a: i64x2 = a.as_i64x2();
212 let r: i64x2 = simd_select::<i64x2, _>(mask:simd_lt(a, i64x2::ZERO), if_true:simd_neg(a), if_false:a);
213 transmute(src:r)
214 }
215}
216
217/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
218///
219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_abs_epi64)
220#[inline]
221#[target_feature(enable = "avx512f,avx512vl")]
222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
223#[cfg_attr(test, assert_instr(vpabsq))]
224pub fn _mm_mask_abs_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
225 unsafe {
226 let abs: i64x2 = _mm_abs_epi64(a).as_i64x2();
227 transmute(src:simd_select_bitmask(m:k, yes:abs, no:src.as_i64x2()))
228 }
229}
230
231/// Compute the absolute value of packed signed 64-bit integers in a, and store the unsigned results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
232///
233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_abs_epi64)
234#[inline]
235#[target_feature(enable = "avx512f,avx512vl")]
236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
237#[cfg_attr(test, assert_instr(vpabsq))]
238pub fn _mm_maskz_abs_epi64(k: __mmask8, a: __m128i) -> __m128i {
239 unsafe {
240 let abs: i64x2 = _mm_abs_epi64(a).as_i64x2();
241 transmute(src:simd_select_bitmask(m:k, yes:abs, no:i64x2::ZERO))
242 }
243}
244
245/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst.
246///
247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_ps&expand=65)
248#[inline]
249#[target_feature(enable = "avx512f")]
250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
251#[cfg_attr(test, assert_instr(vpandd))]
252pub fn _mm512_abs_ps(v2: __m512) -> __m512 {
253 unsafe { simd_fabs(v2) }
254}
255
256/// Finds the absolute value of each packed single-precision (32-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
257///
258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_ps&expand=66)
259#[inline]
260#[target_feature(enable = "avx512f")]
261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
262#[cfg_attr(test, assert_instr(vpandd))]
263pub fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
264 unsafe { simd_select_bitmask(m:k, yes:simd_fabs(v2), no:src) }
265}
266
267/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst.
268///
269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_abs_pd&expand=60)
270#[inline]
271#[target_feature(enable = "avx512f")]
272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
273#[cfg_attr(test, assert_instr(vpandq))]
274pub fn _mm512_abs_pd(v2: __m512d) -> __m512d {
275 unsafe { simd_fabs(v2) }
276}
277
278/// Finds the absolute value of each packed double-precision (64-bit) floating-point element in v2, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
279///
280/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_abs_pd&expand=61)
281#[inline]
282#[target_feature(enable = "avx512f")]
283#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
284#[cfg_attr(test, assert_instr(vpandq))]
285pub fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
286 unsafe { simd_select_bitmask(m:k, yes:simd_fabs(v2), no:src) }
287}
288
289/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
290///
291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi32&expand=3801)
292#[inline]
293#[target_feature(enable = "avx512f")]
294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
295#[cfg_attr(test, assert_instr(vmovdqa32))]
296pub fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
297 unsafe {
298 let mov: i32x16 = a.as_i32x16();
299 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x16()))
300 }
301}
302
303/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
304///
305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi32&expand=3802)
306#[inline]
307#[target_feature(enable = "avx512f")]
308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
309#[cfg_attr(test, assert_instr(vmovdqa32))]
310pub fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
311 unsafe {
312 let mov: i32x16 = a.as_i32x16();
313 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i32x16::ZERO))
314 }
315}
316
317/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
318///
319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi32&expand=3799)
320#[inline]
321#[target_feature(enable = "avx512f,avx512vl")]
322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
323#[cfg_attr(test, assert_instr(vmovdqa32))]
324pub fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
325 unsafe {
326 let mov: i32x8 = a.as_i32x8();
327 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x8()))
328 }
329}
330
331/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
332///
333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi32&expand=3800)
334#[inline]
335#[target_feature(enable = "avx512f,avx512vl")]
336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
337#[cfg_attr(test, assert_instr(vmovdqa32))]
338pub fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
339 unsafe {
340 let mov: i32x8 = a.as_i32x8();
341 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i32x8::ZERO))
342 }
343}
344
345/// Move packed 32-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
346///
347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi32&expand=3797)
348#[inline]
349#[target_feature(enable = "avx512f,avx512vl")]
350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
351#[cfg_attr(test, assert_instr(vmovdqa32))]
352pub fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
353 unsafe {
354 let mov: i32x4 = a.as_i32x4();
355 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i32x4()))
356 }
357}
358
359/// Move packed 32-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
360///
361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi32&expand=3798)
362#[inline]
363#[target_feature(enable = "avx512f,avx512vl")]
364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
365#[cfg_attr(test, assert_instr(vmovdqa32))]
366pub fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
367 unsafe {
368 let mov: i32x4 = a.as_i32x4();
369 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i32x4::ZERO))
370 }
371}
372
373/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
374///
375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_epi64&expand=3807)
376#[inline]
377#[target_feature(enable = "avx512f")]
378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
379#[cfg_attr(test, assert_instr(vmovdqa64))]
380pub fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
381 unsafe {
382 let mov: i64x8 = a.as_i64x8();
383 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x8()))
384 }
385}
386
387/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
388///
389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_epi64&expand=3808)
390#[inline]
391#[target_feature(enable = "avx512f")]
392#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
393#[cfg_attr(test, assert_instr(vmovdqa64))]
394pub fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
395 unsafe {
396 let mov: i64x8 = a.as_i64x8();
397 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i64x8::ZERO))
398 }
399}
400
401/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
402///
403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_epi64&expand=3805)
404#[inline]
405#[target_feature(enable = "avx512f,avx512vl")]
406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
407#[cfg_attr(test, assert_instr(vmovdqa64))]
408pub fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
409 unsafe {
410 let mov: i64x4 = a.as_i64x4();
411 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x4()))
412 }
413}
414
415/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
416///
417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_epi64&expand=3806)
418#[inline]
419#[target_feature(enable = "avx512f,avx512vl")]
420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
421#[cfg_attr(test, assert_instr(vmovdqa64))]
422pub fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
423 unsafe {
424 let mov: i64x4 = a.as_i64x4();
425 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i64x4::ZERO))
426 }
427}
428
429/// Move packed 64-bit integers from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
430///
431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_epi64&expand=3803)
432#[inline]
433#[target_feature(enable = "avx512f,avx512vl")]
434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
435#[cfg_attr(test, assert_instr(vmovdqa64))]
436pub fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
437 unsafe {
438 let mov: i64x2 = a.as_i64x2();
439 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_i64x2()))
440 }
441}
442
443/// Move packed 64-bit integers from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
444///
445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_epi64&expand=3804)
446#[inline]
447#[target_feature(enable = "avx512f,avx512vl")]
448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
449#[cfg_attr(test, assert_instr(vmovdqa64))]
450pub fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
451 unsafe {
452 let mov: i64x2 = a.as_i64x2();
453 transmute(src:simd_select_bitmask(m:k, yes:mov, no:i64x2::ZERO))
454 }
455}
456
457/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
458///
459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_ps&expand=3825)
460#[inline]
461#[target_feature(enable = "avx512f")]
462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
463#[cfg_attr(test, assert_instr(vmovaps))]
464pub fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
465 unsafe {
466 let mov: f32x16 = a.as_f32x16();
467 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
468 }
469}
470
471/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
472///
473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_ps&expand=3826)
474#[inline]
475#[target_feature(enable = "avx512f")]
476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
477#[cfg_attr(test, assert_instr(vmovaps))]
478pub fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
479 unsafe {
480 let mov: f32x16 = a.as_f32x16();
481 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x16::ZERO))
482 }
483}
484
485/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
486///
487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_ps&expand=3823)
488#[inline]
489#[target_feature(enable = "avx512f,avx512vl")]
490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
491#[cfg_attr(test, assert_instr(vmovaps))]
492pub fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
493 unsafe {
494 let mov: f32x8 = a.as_f32x8();
495 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x8()))
496 }
497}
498
499/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
500///
501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_ps&expand=3824)
502#[inline]
503#[target_feature(enable = "avx512f,avx512vl")]
504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
505#[cfg_attr(test, assert_instr(vmovaps))]
506pub fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
507 unsafe {
508 let mov: f32x8 = a.as_f32x8();
509 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x8::ZERO))
510 }
511}
512
513/// Move packed single-precision (32-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
514///
515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_ps&expand=3821)
516#[inline]
517#[target_feature(enable = "avx512f,avx512vl")]
518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
519#[cfg_attr(test, assert_instr(vmovaps))]
520pub fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
521 unsafe {
522 let mov: f32x4 = a.as_f32x4();
523 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x4()))
524 }
525}
526
527/// Move packed single-precision (32-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
528///
529/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_ps&expand=3822)
530#[inline]
531#[target_feature(enable = "avx512f,avx512vl")]
532#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
533#[cfg_attr(test, assert_instr(vmovaps))]
534pub fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
535 unsafe {
536 let mov: f32x4 = a.as_f32x4();
537 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x4::ZERO))
538 }
539}
540
541/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
542///
543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mov_pd&expand=3819)
544#[inline]
545#[target_feature(enable = "avx512f")]
546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
547#[cfg_attr(test, assert_instr(vmovapd))]
548pub fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
549 unsafe {
550 let mov: f64x8 = a.as_f64x8();
551 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x8()))
552 }
553}
554
555/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
556///
557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mov_pd&expand=3820)
558#[inline]
559#[target_feature(enable = "avx512f")]
560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
561#[cfg_attr(test, assert_instr(vmovapd))]
562pub fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
563 unsafe {
564 let mov: f64x8 = a.as_f64x8();
565 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x8::ZERO))
566 }
567}
568
569/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
570///
571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mov_pd&expand=3817)
572#[inline]
573#[target_feature(enable = "avx512f,avx512vl")]
574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
575#[cfg_attr(test, assert_instr(vmovapd))]
576pub fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
577 unsafe {
578 let mov: f64x4 = a.as_f64x4();
579 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x4()))
580 }
581}
582
583/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
584///
585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mov_pd&expand=3818)
586#[inline]
587#[target_feature(enable = "avx512f,avx512vl")]
588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
589#[cfg_attr(test, assert_instr(vmovapd))]
590pub fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
591 unsafe {
592 let mov: f64x4 = a.as_f64x4();
593 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x4::ZERO))
594 }
595}
596
597/// Move packed double-precision (64-bit) floating-point elements from a to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
598///
599/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mov_pd&expand=3815)
600#[inline]
601#[target_feature(enable = "avx512f,avx512vl")]
602#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
603#[cfg_attr(test, assert_instr(vmovapd))]
604pub fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
605 unsafe {
606 let mov: f64x2 = a.as_f64x2();
607 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x2()))
608 }
609}
610
611/// Move packed double-precision (64-bit) floating-point elements from a into dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
612///
613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mov_pd&expand=3816)
614#[inline]
615#[target_feature(enable = "avx512f,avx512vl")]
616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
617#[cfg_attr(test, assert_instr(vmovapd))]
618pub fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
619 unsafe {
620 let mov: f64x2 = a.as_f64x2();
621 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x2::ZERO))
622 }
623}
624
625/// Add packed 32-bit integers in a and b, and store the results in dst.
626///
627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi32&expand=100)
628#[inline]
629#[target_feature(enable = "avx512f")]
630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
631#[cfg_attr(test, assert_instr(vpaddd))]
632pub fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
633 unsafe { transmute(src:simd_add(x:a.as_i32x16(), y:b.as_i32x16())) }
634}
635
636/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
637///
638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi32&expand=101)
639#[inline]
640#[target_feature(enable = "avx512f")]
641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
642#[cfg_attr(test, assert_instr(vpaddd))]
643pub fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
644 unsafe {
645 let add: i32x16 = _mm512_add_epi32(a, b).as_i32x16();
646 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x16()))
647 }
648}
649
650/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
651///
652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi32&expand=102)
653#[inline]
654#[target_feature(enable = "avx512f")]
655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
656#[cfg_attr(test, assert_instr(vpaddd))]
657pub fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
658 unsafe {
659 let add: i32x16 = _mm512_add_epi32(a, b).as_i32x16();
660 transmute(src:simd_select_bitmask(m:k, yes:add, no:i32x16::ZERO))
661 }
662}
663
664/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
665///
666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi32&expand=98)
667#[inline]
668#[target_feature(enable = "avx512f,avx512vl")]
669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
670#[cfg_attr(test, assert_instr(vpaddd))]
671pub fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
672 unsafe {
673 let add: i32x8 = _mm256_add_epi32(a, b).as_i32x8();
674 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x8()))
675 }
676}
677
678/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
679///
680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi32&expand=99)
681#[inline]
682#[target_feature(enable = "avx512f,avx512vl")]
683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
684#[cfg_attr(test, assert_instr(vpaddd))]
685pub fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
686 unsafe {
687 let add: i32x8 = _mm256_add_epi32(a, b).as_i32x8();
688 transmute(src:simd_select_bitmask(m:k, yes:add, no:i32x8::ZERO))
689 }
690}
691
692/// Add packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
693///
694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi32&expand=95)
695#[inline]
696#[target_feature(enable = "avx512f,avx512vl")]
697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
698#[cfg_attr(test, assert_instr(vpaddd))]
699pub fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
700 unsafe {
701 let add: i32x4 = _mm_add_epi32(a, b).as_i32x4();
702 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i32x4()))
703 }
704}
705
706/// Add packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
707///
708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi32&expand=96)
709#[inline]
710#[target_feature(enable = "avx512f,avx512vl")]
711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
712#[cfg_attr(test, assert_instr(vpaddd))]
713pub fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
714 unsafe {
715 let add: i32x4 = _mm_add_epi32(a, b).as_i32x4();
716 transmute(src:simd_select_bitmask(m:k, yes:add, no:i32x4::ZERO))
717 }
718}
719
720/// Add packed 64-bit integers in a and b, and store the results in dst.
721///
722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_epi64&expand=109)
723#[inline]
724#[target_feature(enable = "avx512f")]
725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
726#[cfg_attr(test, assert_instr(vpaddq))]
727pub fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
728 unsafe { transmute(src:simd_add(x:a.as_i64x8(), y:b.as_i64x8())) }
729}
730
731/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
732///
733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_epi64&expand=110)
734#[inline]
735#[target_feature(enable = "avx512f")]
736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
737#[cfg_attr(test, assert_instr(vpaddq))]
738pub fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
739 unsafe {
740 let add: i64x8 = _mm512_add_epi64(a, b).as_i64x8();
741 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x8()))
742 }
743}
744
745/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
746///
747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_epi64&expand=111)
748#[inline]
749#[target_feature(enable = "avx512f")]
750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
751#[cfg_attr(test, assert_instr(vpaddq))]
752pub fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
753 unsafe {
754 let add: i64x8 = _mm512_add_epi64(a, b).as_i64x8();
755 transmute(src:simd_select_bitmask(m:k, yes:add, no:i64x8::ZERO))
756 }
757}
758
759/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
760///
761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_epi64&expand=107)
762#[inline]
763#[target_feature(enable = "avx512f,avx512vl")]
764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
765#[cfg_attr(test, assert_instr(vpaddq))]
766pub fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
767 unsafe {
768 let add: i64x4 = _mm256_add_epi64(a, b).as_i64x4();
769 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x4()))
770 }
771}
772
773/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
774///
775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_epi64&expand=108)
776#[inline]
777#[target_feature(enable = "avx512f,avx512vl")]
778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
779#[cfg_attr(test, assert_instr(vpaddq))]
780pub fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
781 unsafe {
782 let add: i64x4 = _mm256_add_epi64(a, b).as_i64x4();
783 transmute(src:simd_select_bitmask(m:k, yes:add, no:i64x4::ZERO))
784 }
785}
786
787/// Add packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
788///
789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_epi64&expand=104)
790#[inline]
791#[target_feature(enable = "avx512f,avx512vl")]
792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
793#[cfg_attr(test, assert_instr(vpaddq))]
794pub fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
795 unsafe {
796 let add: i64x2 = _mm_add_epi64(a, b).as_i64x2();
797 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_i64x2()))
798 }
799}
800
801/// Add packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
802///
803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_epi64&expand=105)
804#[inline]
805#[target_feature(enable = "avx512f,avx512vl")]
806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
807#[cfg_attr(test, assert_instr(vpaddq))]
808pub fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
809 unsafe {
810 let add: i64x2 = _mm_add_epi64(a, b).as_i64x2();
811 transmute(src:simd_select_bitmask(m:k, yes:add, no:i64x2::ZERO))
812 }
813}
814
815/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
816///
817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_ps&expand=139)
818#[inline]
819#[target_feature(enable = "avx512f")]
820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
821#[cfg_attr(test, assert_instr(vaddps))]
822pub fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
823 unsafe { transmute(src:simd_add(x:a.as_f32x16(), y:b.as_f32x16())) }
824}
825
826/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
827///
828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_ps&expand=140)
829#[inline]
830#[target_feature(enable = "avx512f")]
831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
832#[cfg_attr(test, assert_instr(vaddps))]
833pub fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
834 unsafe {
835 let add: f32x16 = _mm512_add_ps(a, b).as_f32x16();
836 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x16()))
837 }
838}
839
840/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
841///
842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_ps&expand=141)
843#[inline]
844#[target_feature(enable = "avx512f")]
845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
846#[cfg_attr(test, assert_instr(vaddps))]
847pub fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
848 unsafe {
849 let add: f32x16 = _mm512_add_ps(a, b).as_f32x16();
850 transmute(src:simd_select_bitmask(m:k, yes:add, no:f32x16::ZERO))
851 }
852}
853
854/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
855///
856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_ps&expand=137)
857#[inline]
858#[target_feature(enable = "avx512f,avx512vl")]
859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
860#[cfg_attr(test, assert_instr(vaddps))]
861pub fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
862 unsafe {
863 let add: f32x8 = _mm256_add_ps(a, b).as_f32x8();
864 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x8()))
865 }
866}
867
868/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
869///
870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_ps&expand=138)
871#[inline]
872#[target_feature(enable = "avx512f,avx512vl")]
873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
874#[cfg_attr(test, assert_instr(vaddps))]
875pub fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
876 unsafe {
877 let add: f32x8 = _mm256_add_ps(a, b).as_f32x8();
878 transmute(src:simd_select_bitmask(m:k, yes:add, no:f32x8::ZERO))
879 }
880}
881
882/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
883///
884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_ps&expand=134)
885#[inline]
886#[target_feature(enable = "avx512f,avx512vl")]
887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
888#[cfg_attr(test, assert_instr(vaddps))]
889pub fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
890 unsafe {
891 let add: f32x4 = _mm_add_ps(a, b).as_f32x4();
892 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f32x4()))
893 }
894}
895
896/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
897///
898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_ps&expand=135)
899#[inline]
900#[target_feature(enable = "avx512f,avx512vl")]
901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
902#[cfg_attr(test, assert_instr(vaddps))]
903pub fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
904 unsafe {
905 let add: f32x4 = _mm_add_ps(a, b).as_f32x4();
906 transmute(src:simd_select_bitmask(m:k, yes:add, no:f32x4::ZERO))
907 }
908}
909
910/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
911///
912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_pd&expand=127)
913#[inline]
914#[target_feature(enable = "avx512f")]
915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
916#[cfg_attr(test, assert_instr(vaddpd))]
917pub fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
918 unsafe { transmute(src:simd_add(x:a.as_f64x8(), y:b.as_f64x8())) }
919}
920
921/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
922///
923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_pd&expand=128)
924#[inline]
925#[target_feature(enable = "avx512f")]
926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
927#[cfg_attr(test, assert_instr(vaddpd))]
928pub fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
929 unsafe {
930 let add: f64x8 = _mm512_add_pd(a, b).as_f64x8();
931 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x8()))
932 }
933}
934
935/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
936///
937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_pd&expand=129)
938#[inline]
939#[target_feature(enable = "avx512f")]
940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
941#[cfg_attr(test, assert_instr(vaddpd))]
942pub fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
943 unsafe {
944 let add: f64x8 = _mm512_add_pd(a, b).as_f64x8();
945 transmute(src:simd_select_bitmask(m:k, yes:add, no:f64x8::ZERO))
946 }
947}
948
949/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
950///
951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_add_pd&expand=125)
952#[inline]
953#[target_feature(enable = "avx512f,avx512vl")]
954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
955#[cfg_attr(test, assert_instr(vaddpd))]
956pub fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
957 unsafe {
958 let add: f64x4 = _mm256_add_pd(a, b).as_f64x4();
959 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x4()))
960 }
961}
962
963/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
964///
965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_add_pd&expand=126)
966#[inline]
967#[target_feature(enable = "avx512f,avx512vl")]
968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
969#[cfg_attr(test, assert_instr(vaddpd))]
970pub fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
971 unsafe {
972 let add: f64x4 = _mm256_add_pd(a, b).as_f64x4();
973 transmute(src:simd_select_bitmask(m:k, yes:add, no:f64x4::ZERO))
974 }
975}
976
977/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
978///
979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_add_pd&expand=122)
980#[inline]
981#[target_feature(enable = "avx512f,avx512vl")]
982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
983#[cfg_attr(test, assert_instr(vaddpd))]
984pub fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
985 unsafe {
986 let add: f64x2 = _mm_add_pd(a, b).as_f64x2();
987 transmute(src:simd_select_bitmask(m:k, yes:add, no:src.as_f64x2()))
988 }
989}
990
991/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
992///
993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_pd&expand=123)
994#[inline]
995#[target_feature(enable = "avx512f,avx512vl")]
996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
997#[cfg_attr(test, assert_instr(vaddpd))]
998pub fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
999 unsafe {
1000 let add: f64x2 = _mm_add_pd(a, b).as_f64x2();
1001 transmute(src:simd_select_bitmask(m:k, yes:add, no:f64x2::ZERO))
1002 }
1003}
1004
1005/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst.
1006///
1007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi32&expand=5694)
1008#[inline]
1009#[target_feature(enable = "avx512f")]
1010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1011#[cfg_attr(test, assert_instr(vpsubd))]
1012pub fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
1013 unsafe { transmute(src:simd_sub(lhs:a.as_i32x16(), rhs:b.as_i32x16())) }
1014}
1015
1016/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1017///
1018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi32&expand=5692)
1019#[inline]
1020#[target_feature(enable = "avx512f")]
1021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1022#[cfg_attr(test, assert_instr(vpsubd))]
1023pub fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1024 unsafe {
1025 let sub: i32x16 = _mm512_sub_epi32(a, b).as_i32x16();
1026 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x16()))
1027 }
1028}
1029
1030/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1031///
1032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi32&expand=5693)
1033#[inline]
1034#[target_feature(enable = "avx512f")]
1035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1036#[cfg_attr(test, assert_instr(vpsubd))]
1037pub fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1038 unsafe {
1039 let sub: i32x16 = _mm512_sub_epi32(a, b).as_i32x16();
1040 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i32x16::ZERO))
1041 }
1042}
1043
1044/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1045///
1046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi32&expand=5689)
1047#[inline]
1048#[target_feature(enable = "avx512f,avx512vl")]
1049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1050#[cfg_attr(test, assert_instr(vpsubd))]
1051pub fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1052 unsafe {
1053 let sub: i32x8 = _mm256_sub_epi32(a, b).as_i32x8();
1054 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x8()))
1055 }
1056}
1057
1058/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1059///
1060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi32&expand=5690)
1061#[inline]
1062#[target_feature(enable = "avx512f,avx512vl")]
1063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1064#[cfg_attr(test, assert_instr(vpsubd))]
1065pub fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1066 unsafe {
1067 let sub: i32x8 = _mm256_sub_epi32(a, b).as_i32x8();
1068 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i32x8::ZERO))
1069 }
1070}
1071
1072/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1073///
1074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi32&expand=5686)
1075#[inline]
1076#[target_feature(enable = "avx512f,avx512vl")]
1077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1078#[cfg_attr(test, assert_instr(vpsubd))]
1079pub fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1080 unsafe {
1081 let sub: i32x4 = _mm_sub_epi32(a, b).as_i32x4();
1082 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i32x4()))
1083 }
1084}
1085
1086/// Subtract packed 32-bit integers in b from packed 32-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1087///
1088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi32&expand=5687)
1089#[inline]
1090#[target_feature(enable = "avx512f,avx512vl")]
1091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1092#[cfg_attr(test, assert_instr(vpsubd))]
1093pub fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1094 unsafe {
1095 let sub: i32x4 = _mm_sub_epi32(a, b).as_i32x4();
1096 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i32x4::ZERO))
1097 }
1098}
1099
1100/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst.
1101///
1102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_epi64&expand=5703)
1103#[inline]
1104#[target_feature(enable = "avx512f")]
1105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1106#[cfg_attr(test, assert_instr(vpsubq))]
1107pub fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
1108 unsafe { transmute(src:simd_sub(lhs:a.as_i64x8(), rhs:b.as_i64x8())) }
1109}
1110
1111/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1112///
1113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_epi64&expand=5701)
1114#[inline]
1115#[target_feature(enable = "avx512f")]
1116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1117#[cfg_attr(test, assert_instr(vpsubq))]
1118pub fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1119 unsafe {
1120 let sub: i64x8 = _mm512_sub_epi64(a, b).as_i64x8();
1121 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x8()))
1122 }
1123}
1124
1125/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1126///
1127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_epi64&expand=5702)
1128#[inline]
1129#[target_feature(enable = "avx512f")]
1130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1131#[cfg_attr(test, assert_instr(vpsubq))]
1132pub fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1133 unsafe {
1134 let sub: i64x8 = _mm512_sub_epi64(a, b).as_i64x8();
1135 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i64x8::ZERO))
1136 }
1137}
1138
1139/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1140///
1141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_epi64&expand=5698)
1142#[inline]
1143#[target_feature(enable = "avx512f,avx512vl")]
1144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1145#[cfg_attr(test, assert_instr(vpsubq))]
1146pub fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1147 unsafe {
1148 let sub: i64x4 = _mm256_sub_epi64(a, b).as_i64x4();
1149 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x4()))
1150 }
1151}
1152
1153/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1154///
1155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_epi64&expand=5699)
1156#[inline]
1157#[target_feature(enable = "avx512f,avx512vl")]
1158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1159#[cfg_attr(test, assert_instr(vpsubq))]
1160pub fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1161 unsafe {
1162 let sub: i64x4 = _mm256_sub_epi64(a, b).as_i64x4();
1163 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i64x4::ZERO))
1164 }
1165}
1166
1167/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1168///
1169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_epi64&expand=5695)
1170#[inline]
1171#[target_feature(enable = "avx512f,avx512vl")]
1172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1173#[cfg_attr(test, assert_instr(vpsubq))]
1174pub fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1175 unsafe {
1176 let sub: i64x2 = _mm_sub_epi64(a, b).as_i64x2();
1177 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_i64x2()))
1178 }
1179}
1180
1181/// Subtract packed 64-bit integers in b from packed 64-bit integers in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1182///
1183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_epi64&expand=5696)
1184#[inline]
1185#[target_feature(enable = "avx512f,avx512vl")]
1186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1187#[cfg_attr(test, assert_instr(vpsubq))]
1188pub fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1189 unsafe {
1190 let sub: i64x2 = _mm_sub_epi64(a, b).as_i64x2();
1191 transmute(src:simd_select_bitmask(m:k, yes:sub, no:i64x2::ZERO))
1192 }
1193}
1194
1195/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
1196///
1197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_ps&expand=5733)
1198#[inline]
1199#[target_feature(enable = "avx512f")]
1200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1201#[cfg_attr(test, assert_instr(vsubps))]
1202pub fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
1203 unsafe { transmute(src:simd_sub(lhs:a.as_f32x16(), rhs:b.as_f32x16())) }
1204}
1205
1206/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1207///
1208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_ps&expand=5731)
1209#[inline]
1210#[target_feature(enable = "avx512f")]
1211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1212#[cfg_attr(test, assert_instr(vsubps))]
1213pub fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1214 unsafe {
1215 let sub: f32x16 = _mm512_sub_ps(a, b).as_f32x16();
1216 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x16()))
1217 }
1218}
1219
1220/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1221///
1222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_ps&expand=5732)
1223#[inline]
1224#[target_feature(enable = "avx512f")]
1225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1226#[cfg_attr(test, assert_instr(vsubps))]
1227pub fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1228 unsafe {
1229 let sub: f32x16 = _mm512_sub_ps(a, b).as_f32x16();
1230 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f32x16::ZERO))
1231 }
1232}
1233
1234/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1235///
1236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_ps&expand=5728)
1237#[inline]
1238#[target_feature(enable = "avx512f,avx512vl")]
1239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1240#[cfg_attr(test, assert_instr(vsubps))]
1241pub fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1242 unsafe {
1243 let sub: f32x8 = _mm256_sub_ps(a, b).as_f32x8();
1244 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x8()))
1245 }
1246}
1247
1248/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1249///
1250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_ps&expand=5729)
1251#[inline]
1252#[target_feature(enable = "avx512f,avx512vl")]
1253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1254#[cfg_attr(test, assert_instr(vsubps))]
1255pub fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1256 unsafe {
1257 let sub: f32x8 = _mm256_sub_ps(a, b).as_f32x8();
1258 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f32x8::ZERO))
1259 }
1260}
1261
1262/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1263///
1264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_ps&expand=5725)
1265#[inline]
1266#[target_feature(enable = "avx512f,avx512vl")]
1267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1268#[cfg_attr(test, assert_instr(vsubps))]
1269pub fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1270 unsafe {
1271 let sub: f32x4 = _mm_sub_ps(a, b).as_f32x4();
1272 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f32x4()))
1273 }
1274}
1275
1276/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1277///
1278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_ps&expand=5726)
1279#[inline]
1280#[target_feature(enable = "avx512f,avx512vl")]
1281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1282#[cfg_attr(test, assert_instr(vsubps))]
1283pub fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1284 unsafe {
1285 let sub: f32x4 = _mm_sub_ps(a, b).as_f32x4();
1286 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f32x4::ZERO))
1287 }
1288}
1289
1290/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
1291///
1292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_pd&expand=5721)
1293#[inline]
1294#[target_feature(enable = "avx512f")]
1295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1296#[cfg_attr(test, assert_instr(vsubpd))]
1297pub fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
1298 unsafe { transmute(src:simd_sub(lhs:a.as_f64x8(), rhs:b.as_f64x8())) }
1299}
1300
1301/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1302///
1303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_pd&expand=5719)
1304#[inline]
1305#[target_feature(enable = "avx512f")]
1306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1307#[cfg_attr(test, assert_instr(vsubpd))]
1308pub fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1309 unsafe {
1310 let sub: f64x8 = _mm512_sub_pd(a, b).as_f64x8();
1311 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x8()))
1312 }
1313}
1314
1315/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1316///
1317/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_pd&expand=5720)
1318#[inline]
1319#[target_feature(enable = "avx512f")]
1320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1321#[cfg_attr(test, assert_instr(vsubpd))]
1322pub fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1323 unsafe {
1324 let sub: f64x8 = _mm512_sub_pd(a, b).as_f64x8();
1325 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f64x8::ZERO))
1326 }
1327}
1328
1329/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1330///
1331/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sub_pd&expand=5716)
1332#[inline]
1333#[target_feature(enable = "avx512f,avx512vl")]
1334#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1335#[cfg_attr(test, assert_instr(vsubpd))]
1336pub fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1337 unsafe {
1338 let sub: f64x4 = _mm256_sub_pd(a, b).as_f64x4();
1339 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x4()))
1340 }
1341}
1342
1343/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1344///
1345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sub_pd&expand=5717)
1346#[inline]
1347#[target_feature(enable = "avx512f,avx512vl")]
1348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1349#[cfg_attr(test, assert_instr(vsubpd))]
1350pub fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1351 unsafe {
1352 let sub: f64x4 = _mm256_sub_pd(a, b).as_f64x4();
1353 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f64x4::ZERO))
1354 }
1355}
1356
1357/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1358///
1359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sub_pd&expand=5713)
1360#[inline]
1361#[target_feature(enable = "avx512f,avx512vl")]
1362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1363#[cfg_attr(test, assert_instr(vsubpd))]
1364pub fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1365 unsafe {
1366 let sub: f64x2 = _mm_sub_pd(a, b).as_f64x2();
1367 transmute(src:simd_select_bitmask(m:k, yes:sub, no:src.as_f64x2()))
1368 }
1369}
1370
1371/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1372///
1373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sub_pd&expand=5714)
1374#[inline]
1375#[target_feature(enable = "avx512f,avx512vl")]
1376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1377#[cfg_attr(test, assert_instr(vsubpd))]
1378pub fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1379 unsafe {
1380 let sub: f64x2 = _mm_sub_pd(a, b).as_f64x2();
1381 transmute(src:simd_select_bitmask(m:k, yes:sub, no:f64x2::ZERO))
1382 }
1383}
1384
1385/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst.
1386///
1387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epi32&expand=3907)
1388#[inline]
1389#[target_feature(enable = "avx512f")]
1390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1391#[cfg_attr(test, assert_instr(vpmuldq))]
1392pub fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
1393 unsafe {
1394 let a: i64x8 = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(a.as_i64x8()));
1395 let b: i64x8 = simd_cast::<_, i64x8>(simd_cast::<_, i32x8>(b.as_i64x8()));
1396 transmute(src:simd_mul(x:a, y:b))
1397 }
1398}
1399
1400/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1401///
1402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epi32&expand=3905)
1403#[inline]
1404#[target_feature(enable = "avx512f")]
1405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1406#[cfg_attr(test, assert_instr(vpmuldq))]
1407pub fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1408 unsafe {
1409 let mul: i64x8 = _mm512_mul_epi32(a, b).as_i64x8();
1410 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x8()))
1411 }
1412}
1413
1414/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1415///
1416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epi32&expand=3906)
1417#[inline]
1418#[target_feature(enable = "avx512f")]
1419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1420#[cfg_attr(test, assert_instr(vpmuldq))]
1421pub fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1422 unsafe {
1423 let mul: i64x8 = _mm512_mul_epi32(a, b).as_i64x8();
1424 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i64x8::ZERO))
1425 }
1426}
1427
1428/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1429///
1430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epi32&expand=3902)
1431#[inline]
1432#[target_feature(enable = "avx512f,avx512vl")]
1433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1434#[cfg_attr(test, assert_instr(vpmuldq))]
1435pub fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1436 unsafe {
1437 let mul: i64x4 = _mm256_mul_epi32(a, b).as_i64x4();
1438 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x4()))
1439 }
1440}
1441
1442/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1443///
1444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epi32&expand=3903)
1445#[inline]
1446#[target_feature(enable = "avx512f,avx512vl")]
1447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1448#[cfg_attr(test, assert_instr(vpmuldq))]
1449pub fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1450 unsafe {
1451 let mul: i64x4 = _mm256_mul_epi32(a, b).as_i64x4();
1452 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i64x4::ZERO))
1453 }
1454}
1455
1456/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1457///
1458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epi32&expand=3899)
1459#[inline]
1460#[target_feature(enable = "avx512f,avx512vl")]
1461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1462#[cfg_attr(test, assert_instr(vpmuldq))]
1463pub fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1464 unsafe {
1465 let mul: i64x2 = _mm_mul_epi32(a, b).as_i64x2();
1466 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x2()))
1467 }
1468}
1469
1470/// Multiply the low signed 32-bit integers from each packed 64-bit element in a and b, and store the signed 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1471///
1472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epi32&expand=3900)
1473#[inline]
1474#[target_feature(enable = "avx512f,avx512vl")]
1475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1476#[cfg_attr(test, assert_instr(vpmuldq))]
1477pub fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1478 unsafe {
1479 let mul: i64x2 = _mm_mul_epi32(a, b).as_i64x2();
1480 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i64x2::ZERO))
1481 }
1482}
1483
1484/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst.
1485///
1486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullo_epi32&expand=4005)
1487#[inline]
1488#[target_feature(enable = "avx512f")]
1489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1490#[cfg_attr(test, assert_instr(vpmulld))]
1491pub fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
1492 unsafe { transmute(src:simd_mul(x:a.as_i32x16(), y:b.as_i32x16())) }
1493}
1494
1495/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1496///
1497/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullo_epi32&expand=4003)
1498#[inline]
1499#[target_feature(enable = "avx512f")]
1500#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1501#[cfg_attr(test, assert_instr(vpmulld))]
1502pub fn _mm512_mask_mullo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1503 unsafe {
1504 let mul: i32x16 = _mm512_mullo_epi32(a, b).as_i32x16();
1505 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x16()))
1506 }
1507}
1508
1509/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1510///
1511/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mullo_epi32&expand=4004)
1512#[inline]
1513#[target_feature(enable = "avx512f")]
1514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1515#[cfg_attr(test, assert_instr(vpmulld))]
1516pub fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
1517 unsafe {
1518 let mul: i32x16 = _mm512_mullo_epi32(a, b).as_i32x16();
1519 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i32x16::ZERO))
1520 }
1521}
1522
1523/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1524///
1525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mullo_epi32&expand=4000)
1526#[inline]
1527#[target_feature(enable = "avx512f,avx512vl")]
1528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1529#[cfg_attr(test, assert_instr(vpmulld))]
1530pub fn _mm256_mask_mullo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1531 unsafe {
1532 let mul: i32x8 = _mm256_mullo_epi32(a, b).as_i32x8();
1533 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x8()))
1534 }
1535}
1536
1537/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1538///
1539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mullo_epi32&expand=4001)
1540#[inline]
1541#[target_feature(enable = "avx512f,avx512vl")]
1542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1543#[cfg_attr(test, assert_instr(vpmulld))]
1544pub fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1545 unsafe {
1546 let mul: i32x8 = _mm256_mullo_epi32(a, b).as_i32x8();
1547 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i32x8::ZERO))
1548 }
1549}
1550
1551/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1552///
1553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mullo_epi32&expand=3997)
1554#[inline]
1555#[target_feature(enable = "avx512f,avx512vl")]
1556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1557#[cfg_attr(test, assert_instr(vpmulld))]
1558pub fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1559 unsafe {
1560 let mul: i32x4 = _mm_mullo_epi32(a, b).as_i32x4();
1561 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i32x4()))
1562 }
1563}
1564
1565/// Multiply the packed 32-bit integers in a and b, producing intermediate 64-bit integers, and store the low 32 bits of the intermediate integers in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1566///
1567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mullo_epi32&expand=3998)
1568#[inline]
1569#[target_feature(enable = "avx512f,avx512vl")]
1570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1571#[cfg_attr(test, assert_instr(vpmulld))]
1572pub fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1573 unsafe {
1574 let mul: i32x4 = _mm_mullo_epi32(a, b).as_i32x4();
1575 transmute(src:simd_select_bitmask(m:k, yes:mul, no:i32x4::ZERO))
1576 }
1577}
1578
1579/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst.
1580///
1581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mullox_epi64&expand=4017)
1582///
1583/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1584#[inline]
1585#[target_feature(enable = "avx512f")]
1586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1587pub fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
1588 unsafe { transmute(src:simd_mul(x:a.as_i64x8(), y:b.as_i64x8())) }
1589}
1590
1591/// Multiplies elements in packed 64-bit integer vectors a and b together, storing the lower 64 bits of the result in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1592///
1593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mullox_epi64&expand=4016)
1594///
1595/// This intrinsic generates a sequence of instructions, which may perform worse than a native instruction. Consider the performance impact of this intrinsic.
1596#[inline]
1597#[target_feature(enable = "avx512f")]
1598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1599pub fn _mm512_mask_mullox_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1600 unsafe {
1601 let mul: i64x8 = _mm512_mullox_epi64(a, b).as_i64x8();
1602 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_i64x8()))
1603 }
1604}
1605
1606/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst.
1607///
1608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_epu32&expand=3916)
1609#[inline]
1610#[target_feature(enable = "avx512f")]
1611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1612#[cfg_attr(test, assert_instr(vpmuludq))]
1613pub fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
1614 unsafe {
1615 let a: u64x8 = a.as_u64x8();
1616 let b: u64x8 = b.as_u64x8();
1617 let mask: u64x8 = u64x8::splat(u32::MAX.into());
1618 transmute(src:simd_mul(x:simd_and(a, mask), y:simd_and(x:b, y:mask)))
1619 }
1620}
1621
1622/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1623///
1624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_epu32&expand=3914)
1625#[inline]
1626#[target_feature(enable = "avx512f")]
1627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1628#[cfg_attr(test, assert_instr(vpmuludq))]
1629pub fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1630 unsafe {
1631 let mul: u64x8 = _mm512_mul_epu32(a, b).as_u64x8();
1632 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x8()))
1633 }
1634}
1635
1636/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1637///
1638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_epu32&expand=3915)
1639#[inline]
1640#[target_feature(enable = "avx512f")]
1641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1642#[cfg_attr(test, assert_instr(vpmuludq))]
1643pub fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
1644 unsafe {
1645 let mul: u64x8 = _mm512_mul_epu32(a, b).as_u64x8();
1646 transmute(src:simd_select_bitmask(m:k, yes:mul, no:u64x8::ZERO))
1647 }
1648}
1649
1650/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1651///
1652/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_epu32&expand=3911)
1653#[inline]
1654#[target_feature(enable = "avx512f,avx512vl")]
1655#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1656#[cfg_attr(test, assert_instr(vpmuludq))]
1657pub fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1658 unsafe {
1659 let mul: u64x4 = _mm256_mul_epu32(a, b).as_u64x4();
1660 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x4()))
1661 }
1662}
1663
1664/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1665///
1666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_epu32&expand=3912)
1667#[inline]
1668#[target_feature(enable = "avx512f,avx512vl")]
1669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1670#[cfg_attr(test, assert_instr(vpmuludq))]
1671pub fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
1672 unsafe {
1673 let mul: u64x4 = _mm256_mul_epu32(a, b).as_u64x4();
1674 transmute(src:simd_select_bitmask(m:k, yes:mul, no:u64x4::ZERO))
1675 }
1676}
1677
1678/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1679///
1680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_epu32&expand=3908)
1681#[inline]
1682#[target_feature(enable = "avx512f,avx512vl")]
1683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1684#[cfg_attr(test, assert_instr(vpmuludq))]
1685pub fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1686 unsafe {
1687 let mul: u64x2 = _mm_mul_epu32(a, b).as_u64x2();
1688 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_u64x2()))
1689 }
1690}
1691
1692/// Multiply the low unsigned 32-bit integers from each packed 64-bit element in a and b, and store the unsigned 64-bit results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1693///
1694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_epu32&expand=3909)
1695#[inline]
1696#[target_feature(enable = "avx512f,avx512vl")]
1697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1698#[cfg_attr(test, assert_instr(vpmuludq))]
1699pub fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
1700 unsafe {
1701 let mul: u64x2 = _mm_mul_epu32(a, b).as_u64x2();
1702 transmute(src:simd_select_bitmask(m:k, yes:mul, no:u64x2::ZERO))
1703 }
1704}
1705
1706/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.
1707///
1708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_ps&expand=3934)
1709#[inline]
1710#[target_feature(enable = "avx512f")]
1711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1712#[cfg_attr(test, assert_instr(vmulps))]
1713pub fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
1714 unsafe { transmute(src:simd_mul(x:a.as_f32x16(), y:b.as_f32x16())) }
1715}
1716
1717/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1718///
1719/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_ps&expand=3932)
1720#[inline]
1721#[target_feature(enable = "avx512f")]
1722#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1723#[cfg_attr(test, assert_instr(vmulps))]
1724pub fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1725 unsafe {
1726 let mul: f32x16 = _mm512_mul_ps(a, b).as_f32x16();
1727 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x16()))
1728 }
1729}
1730
1731/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1732///
1733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_ps&expand=3933)
1734#[inline]
1735#[target_feature(enable = "avx512f")]
1736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1737#[cfg_attr(test, assert_instr(vmulps))]
1738pub fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1739 unsafe {
1740 let mul: f32x16 = _mm512_mul_ps(a, b).as_f32x16();
1741 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f32x16::ZERO))
1742 }
1743}
1744
1745/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1746///
1747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_ps&expand=3929)
1748#[inline]
1749#[target_feature(enable = "avx512f,avx512vl")]
1750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1751#[cfg_attr(test, assert_instr(vmulps))]
1752pub fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1753 unsafe {
1754 let mul: f32x8 = _mm256_mul_ps(a, b).as_f32x8();
1755 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x8()))
1756 }
1757}
1758
1759/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1760///
1761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_ps&expand=3930)
1762#[inline]
1763#[target_feature(enable = "avx512f,avx512vl")]
1764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1765#[cfg_attr(test, assert_instr(vmulps))]
1766pub fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1767 unsafe {
1768 let mul: f32x8 = _mm256_mul_ps(a, b).as_f32x8();
1769 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f32x8::ZERO))
1770 }
1771}
1772
1773/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1774///
1775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_ps&expand=3926)
1776#[inline]
1777#[target_feature(enable = "avx512f,avx512vl")]
1778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1779#[cfg_attr(test, assert_instr(vmulps))]
1780pub fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1781 unsafe {
1782 let mul: f32x4 = _mm_mul_ps(a, b).as_f32x4();
1783 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f32x4()))
1784 }
1785}
1786
1787/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1788///
1789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_ps&expand=3927)
1790#[inline]
1791#[target_feature(enable = "avx512f,avx512vl")]
1792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1793#[cfg_attr(test, assert_instr(vmulps))]
1794pub fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1795 unsafe {
1796 let mul: f32x4 = _mm_mul_ps(a, b).as_f32x4();
1797 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f32x4::ZERO))
1798 }
1799}
1800
1801/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.
1802///
1803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_pd&expand=3925)
1804#[inline]
1805#[target_feature(enable = "avx512f")]
1806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1807#[cfg_attr(test, assert_instr(vmulpd))]
1808pub fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
1809 unsafe { transmute(src:simd_mul(x:a.as_f64x8(), y:b.as_f64x8())) }
1810}
1811
1812/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1813///
1814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_pd&expand=3923)
1815#[inline]
1816#[target_feature(enable = "avx512f")]
1817#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1818#[cfg_attr(test, assert_instr(vmulpd))]
1819pub fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1820 unsafe {
1821 let mul: f64x8 = _mm512_mul_pd(a, b).as_f64x8();
1822 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x8()))
1823 }
1824}
1825
1826/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1827///
1828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_pd&expand=3924)
1829#[inline]
1830#[target_feature(enable = "avx512f")]
1831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1832#[cfg_attr(test, assert_instr(vmulpd))]
1833pub fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
1834 unsafe {
1835 let mul: f64x8 = _mm512_mul_pd(a, b).as_f64x8();
1836 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f64x8::ZERO))
1837 }
1838}
1839
1840/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1841///
1842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_mul_pd&expand=3920)
1843#[inline]
1844#[target_feature(enable = "avx512f,avx512vl")]
1845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1846#[cfg_attr(test, assert_instr(vmulpd))]
1847pub fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1848 unsafe {
1849 let mul: f64x4 = _mm256_mul_pd(a, b).as_f64x4();
1850 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x4()))
1851 }
1852}
1853
1854/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1855///
1856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_mul_pd&expand=3921)
1857#[inline]
1858#[target_feature(enable = "avx512f,avx512vl")]
1859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1860#[cfg_attr(test, assert_instr(vmulpd))]
1861pub fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
1862 unsafe {
1863 let mul: f64x4 = _mm256_mul_pd(a, b).as_f64x4();
1864 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f64x4::ZERO))
1865 }
1866}
1867
1868/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1869///
1870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_mul_pd&expand=3917)
1871#[inline]
1872#[target_feature(enable = "avx512f,avx512vl")]
1873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1874#[cfg_attr(test, assert_instr(vmulpd))]
1875pub fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1876 unsafe {
1877 let mul: f64x2 = _mm_mul_pd(a, b).as_f64x2();
1878 transmute(src:simd_select_bitmask(m:k, yes:mul, no:src.as_f64x2()))
1879 }
1880}
1881
1882/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1883///
1884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_mul_pd&expand=3918)
1885#[inline]
1886#[target_feature(enable = "avx512f,avx512vl")]
1887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1888#[cfg_attr(test, assert_instr(vmulpd))]
1889pub fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
1890 unsafe {
1891 let mul: f64x2 = _mm_mul_pd(a, b).as_f64x2();
1892 transmute(src:simd_select_bitmask(m:k, yes:mul, no:f64x2::ZERO))
1893 }
1894}
1895
1896/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1897///
1898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_ps&expand=2162)
1899#[inline]
1900#[target_feature(enable = "avx512f")]
1901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1902#[cfg_attr(test, assert_instr(vdivps))]
1903pub fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
1904 unsafe { transmute(src:simd_div(lhs:a.as_f32x16(), rhs:b.as_f32x16())) }
1905}
1906
1907/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1908///
1909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_ps&expand=2163)
1910#[inline]
1911#[target_feature(enable = "avx512f")]
1912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1913#[cfg_attr(test, assert_instr(vdivps))]
1914pub fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
1915 unsafe {
1916 let div: f32x16 = _mm512_div_ps(a, b).as_f32x16();
1917 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x16()))
1918 }
1919}
1920
1921/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1922///
1923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_ps&expand=2164)
1924#[inline]
1925#[target_feature(enable = "avx512f")]
1926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1927#[cfg_attr(test, assert_instr(vdivps))]
1928pub fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
1929 unsafe {
1930 let div: f32x16 = _mm512_div_ps(a, b).as_f32x16();
1931 transmute(src:simd_select_bitmask(m:k, yes:div, no:f32x16::ZERO))
1932 }
1933}
1934
1935/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1936///
1937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_ps&expand=2160)
1938#[inline]
1939#[target_feature(enable = "avx512f,avx512vl")]
1940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1941#[cfg_attr(test, assert_instr(vdivps))]
1942pub fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
1943 unsafe {
1944 let div: f32x8 = _mm256_div_ps(a, b).as_f32x8();
1945 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x8()))
1946 }
1947}
1948
1949/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1950///
1951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_ps&expand=2161)
1952#[inline]
1953#[target_feature(enable = "avx512f,avx512vl")]
1954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1955#[cfg_attr(test, assert_instr(vdivps))]
1956pub fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
1957 unsafe {
1958 let div: f32x8 = _mm256_div_ps(a, b).as_f32x8();
1959 transmute(src:simd_select_bitmask(m:k, yes:div, no:f32x8::ZERO))
1960 }
1961}
1962
1963/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
1964///
1965/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_ps&expand=2157)
1966#[inline]
1967#[target_feature(enable = "avx512f,avx512vl")]
1968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1969#[cfg_attr(test, assert_instr(vdivps))]
1970pub fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
1971 unsafe {
1972 let div: f32x4 = _mm_div_ps(a, b).as_f32x4();
1973 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f32x4()))
1974 }
1975}
1976
1977/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
1978///
1979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_ps&expand=2158)
1980#[inline]
1981#[target_feature(enable = "avx512f,avx512vl")]
1982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1983#[cfg_attr(test, assert_instr(vdivps))]
1984pub fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
1985 unsafe {
1986 let div: f32x4 = _mm_div_ps(a, b).as_f32x4();
1987 transmute(src:simd_select_bitmask(m:k, yes:div, no:f32x4::ZERO))
1988 }
1989}
1990
1991/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst.
1992///
1993/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_pd&expand=2153)
1994#[inline]
1995#[target_feature(enable = "avx512f")]
1996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
1997#[cfg_attr(test, assert_instr(vdivpd))]
1998pub fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
1999 unsafe { transmute(src:simd_div(lhs:a.as_f64x8(), rhs:b.as_f64x8())) }
2000}
2001
2002/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2003///
2004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_pd&expand=2154)
2005#[inline]
2006#[target_feature(enable = "avx512f")]
2007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2008#[cfg_attr(test, assert_instr(vdivpd))]
2009pub fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2010 unsafe {
2011 let div: f64x8 = _mm512_div_pd(a, b).as_f64x8();
2012 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x8()))
2013 }
2014}
2015
2016/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2017///
2018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_pd&expand=2155)
2019#[inline]
2020#[target_feature(enable = "avx512f")]
2021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2022#[cfg_attr(test, assert_instr(vdivpd))]
2023pub fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2024 unsafe {
2025 let div: f64x8 = _mm512_div_pd(a, b).as_f64x8();
2026 transmute(src:simd_select_bitmask(m:k, yes:div, no:f64x8::ZERO))
2027 }
2028}
2029
2030/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2031///
2032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_div_pd&expand=2151)
2033#[inline]
2034#[target_feature(enable = "avx512f,avx512vl")]
2035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2036#[cfg_attr(test, assert_instr(vdivpd))]
2037pub fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2038 unsafe {
2039 let div: f64x4 = _mm256_div_pd(a, b).as_f64x4();
2040 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x4()))
2041 }
2042}
2043
2044/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2045///
2046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_div_pd&expand=2152)
2047#[inline]
2048#[target_feature(enable = "avx512f,avx512vl")]
2049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2050#[cfg_attr(test, assert_instr(vdivpd))]
2051pub fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2052 unsafe {
2053 let div: f64x4 = _mm256_div_pd(a, b).as_f64x4();
2054 transmute(src:simd_select_bitmask(m:k, yes:div, no:f64x4::ZERO))
2055 }
2056}
2057
2058/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2059///
2060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_div_pd&expand=2148)
2061#[inline]
2062#[target_feature(enable = "avx512f,avx512vl")]
2063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2064#[cfg_attr(test, assert_instr(vdivpd))]
2065pub fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2066 unsafe {
2067 let div: f64x2 = _mm_div_pd(a, b).as_f64x2();
2068 transmute(src:simd_select_bitmask(m:k, yes:div, no:src.as_f64x2()))
2069 }
2070}
2071
2072/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2073///
2074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_div_pd&expand=2149)
2075#[inline]
2076#[target_feature(enable = "avx512f,avx512vl")]
2077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2078#[cfg_attr(test, assert_instr(vdivpd))]
2079pub fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2080 unsafe {
2081 let div: f64x2 = _mm_div_pd(a, b).as_f64x2();
2082 transmute(src:simd_select_bitmask(m:k, yes:div, no:f64x2::ZERO))
2083 }
2084}
2085
2086/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst.
2087///
2088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi32&expand=3582)
2089#[inline]
2090#[target_feature(enable = "avx512f")]
2091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2092#[cfg_attr(test, assert_instr(vpmaxsd))]
2093pub fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
2094 unsafe {
2095 let a: i32x16 = a.as_i32x16();
2096 let b: i32x16 = b.as_i32x16();
2097 transmute(src:simd_select::<i32x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2098 }
2099}
2100
2101/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2102///
2103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi32&expand=3580)
2104#[inline]
2105#[target_feature(enable = "avx512f")]
2106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2107#[cfg_attr(test, assert_instr(vpmaxsd))]
2108pub fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2109 unsafe {
2110 let max: i32x16 = _mm512_max_epi32(a, b).as_i32x16();
2111 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x16()))
2112 }
2113}
2114
2115/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2116///
2117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi32&expand=3581)
2118#[inline]
2119#[target_feature(enable = "avx512f")]
2120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2121#[cfg_attr(test, assert_instr(vpmaxsd))]
2122pub fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2123 unsafe {
2124 let max: i32x16 = _mm512_max_epi32(a, b).as_i32x16();
2125 transmute(src:simd_select_bitmask(m:k, yes:max, no:i32x16::ZERO))
2126 }
2127}
2128
2129/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2130///
2131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi32&expand=3577)
2132#[inline]
2133#[target_feature(enable = "avx512f,avx512vl")]
2134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2135#[cfg_attr(test, assert_instr(vpmaxsd))]
2136pub fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2137 unsafe {
2138 let max: i32x8 = _mm256_max_epi32(a, b).as_i32x8();
2139 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x8()))
2140 }
2141}
2142
2143/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2144///
2145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi32&expand=3578)
2146#[inline]
2147#[target_feature(enable = "avx512f,avx512vl")]
2148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2149#[cfg_attr(test, assert_instr(vpmaxsd))]
2150pub fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2151 unsafe {
2152 let max: i32x8 = _mm256_max_epi32(a, b).as_i32x8();
2153 transmute(src:simd_select_bitmask(m:k, yes:max, no:i32x8::ZERO))
2154 }
2155}
2156
2157/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2158///
2159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi32&expand=3574)
2160#[inline]
2161#[target_feature(enable = "avx512f,avx512vl")]
2162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2163#[cfg_attr(test, assert_instr(vpmaxsd))]
2164pub fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2165 unsafe {
2166 let max: i32x4 = _mm_max_epi32(a, b).as_i32x4();
2167 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i32x4()))
2168 }
2169}
2170
2171/// Compare packed signed 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2172///
2173/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi32&expand=3575)
2174#[inline]
2175#[target_feature(enable = "avx512f,avx512vl")]
2176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2177#[cfg_attr(test, assert_instr(vpmaxsd))]
2178pub fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2179 unsafe {
2180 let max: i32x4 = _mm_max_epi32(a, b).as_i32x4();
2181 transmute(src:simd_select_bitmask(m:k, yes:max, no:i32x4::ZERO))
2182 }
2183}
2184
2185/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2186///
2187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epi64&expand=3591)
2188#[inline]
2189#[target_feature(enable = "avx512f")]
2190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2191#[cfg_attr(test, assert_instr(vpmaxsq))]
2192pub fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
2193 unsafe {
2194 let a: i64x8 = a.as_i64x8();
2195 let b: i64x8 = b.as_i64x8();
2196 transmute(src:simd_select::<i64x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2197 }
2198}
2199
2200/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2201///
2202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epi64&expand=3589)
2203#[inline]
2204#[target_feature(enable = "avx512f")]
2205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2206#[cfg_attr(test, assert_instr(vpmaxsq))]
2207pub fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2208 unsafe {
2209 let max: i64x8 = _mm512_max_epi64(a, b).as_i64x8();
2210 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x8()))
2211 }
2212}
2213
2214/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2215///
2216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epi64&expand=3590)
2217#[inline]
2218#[target_feature(enable = "avx512f")]
2219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2220#[cfg_attr(test, assert_instr(vpmaxsq))]
2221pub fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2222 unsafe {
2223 let max: i64x8 = _mm512_max_epi64(a, b).as_i64x8();
2224 transmute(src:simd_select_bitmask(m:k, yes:max, no:i64x8::ZERO))
2225 }
2226}
2227
2228/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2229///
2230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epi64&expand=3588)
2231#[inline]
2232#[target_feature(enable = "avx512f,avx512vl")]
2233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2234#[cfg_attr(test, assert_instr(vpmaxsq))]
2235pub fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
2236 unsafe {
2237 let a: i64x4 = a.as_i64x4();
2238 let b: i64x4 = b.as_i64x4();
2239 transmute(src:simd_select::<i64x4, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2240 }
2241}
2242
2243/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2244///
2245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epi64&expand=3586)
2246#[inline]
2247#[target_feature(enable = "avx512f,avx512vl")]
2248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2249#[cfg_attr(test, assert_instr(vpmaxsq))]
2250pub fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2251 unsafe {
2252 let max: i64x4 = _mm256_max_epi64(a, b).as_i64x4();
2253 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x4()))
2254 }
2255}
2256
2257/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2258///
2259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epi64&expand=3587)
2260#[inline]
2261#[target_feature(enable = "avx512f,avx512vl")]
2262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2263#[cfg_attr(test, assert_instr(vpmaxsq))]
2264pub fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2265 unsafe {
2266 let max: i64x4 = _mm256_max_epi64(a, b).as_i64x4();
2267 transmute(src:simd_select_bitmask(m:k, yes:max, no:i64x4::ZERO))
2268 }
2269}
2270
2271/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst.
2272///
2273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epi64&expand=3585)
2274#[inline]
2275#[target_feature(enable = "avx512f,avx512vl")]
2276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2277#[cfg_attr(test, assert_instr(vpmaxsq))]
2278pub fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
2279 unsafe {
2280 let a: i64x2 = a.as_i64x2();
2281 let b: i64x2 = b.as_i64x2();
2282 transmute(src:simd_select::<i64x2, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2283 }
2284}
2285
2286/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2287///
2288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epi64&expand=3583)
2289#[inline]
2290#[target_feature(enable = "avx512f,avx512vl")]
2291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2292#[cfg_attr(test, assert_instr(vpmaxsq))]
2293pub fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2294 unsafe {
2295 let max: i64x2 = _mm_max_epi64(a, b).as_i64x2();
2296 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_i64x2()))
2297 }
2298}
2299
2300/// Compare packed signed 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2301///
2302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epi64&expand=3584)
2303#[inline]
2304#[target_feature(enable = "avx512f,avx512vl")]
2305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2306#[cfg_attr(test, assert_instr(vpmaxsq))]
2307pub fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2308 unsafe {
2309 let max: i64x2 = _mm_max_epi64(a, b).as_i64x2();
2310 transmute(src:simd_select_bitmask(m:k, yes:max, no:i64x2::ZERO))
2311 }
2312}
2313
2314/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.
2315///
2316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_ps&expand=3655)
2317#[inline]
2318#[target_feature(enable = "avx512f")]
2319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2320#[cfg_attr(test, assert_instr(vmaxps))]
2321pub fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
2322 unsafe {
2323 transmute(src:vmaxps(
2324 a.as_f32x16(),
2325 b.as_f32x16(),
2326 _MM_FROUND_CUR_DIRECTION,
2327 ))
2328 }
2329}
2330
2331/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2332///
2333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_ps&expand=3653)
2334#[inline]
2335#[target_feature(enable = "avx512f")]
2336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2337#[cfg_attr(test, assert_instr(vmaxps))]
2338pub fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2339 unsafe {
2340 let max: f32x16 = _mm512_max_ps(a, b).as_f32x16();
2341 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x16()))
2342 }
2343}
2344
2345/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2346///
2347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_ps&expand=3654)
2348#[inline]
2349#[target_feature(enable = "avx512f")]
2350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2351#[cfg_attr(test, assert_instr(vmaxps))]
2352pub fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
2353 unsafe {
2354 let max: f32x16 = _mm512_max_ps(a, b).as_f32x16();
2355 transmute(src:simd_select_bitmask(m:k, yes:max, no:f32x16::ZERO))
2356 }
2357}
2358
2359/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2360///
2361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_ps&expand=3650)
2362#[inline]
2363#[target_feature(enable = "avx512f,avx512vl")]
2364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2365#[cfg_attr(test, assert_instr(vmaxps))]
2366pub fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
2367 unsafe {
2368 let max: f32x8 = _mm256_max_ps(a, b).as_f32x8();
2369 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x8()))
2370 }
2371}
2372
2373/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2374///
2375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_ps&expand=3651)
2376#[inline]
2377#[target_feature(enable = "avx512f,avx512vl")]
2378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2379#[cfg_attr(test, assert_instr(vmaxps))]
2380pub fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
2381 unsafe {
2382 let max: f32x8 = _mm256_max_ps(a, b).as_f32x8();
2383 transmute(src:simd_select_bitmask(m:k, yes:max, no:f32x8::ZERO))
2384 }
2385}
2386
2387/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2388///
2389/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_ps&expand=3647)
2390#[inline]
2391#[target_feature(enable = "avx512f,avx512vl")]
2392#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2393#[cfg_attr(test, assert_instr(vmaxps))]
2394pub fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
2395 unsafe {
2396 let max: f32x4 = _mm_max_ps(a, b).as_f32x4();
2397 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f32x4()))
2398 }
2399}
2400
2401/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2402///
2403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_ps&expand=3648)
2404#[inline]
2405#[target_feature(enable = "avx512f,avx512vl")]
2406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2407#[cfg_attr(test, assert_instr(vmaxps))]
2408pub fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
2409 unsafe {
2410 let max: f32x4 = _mm_max_ps(a, b).as_f32x4();
2411 transmute(src:simd_select_bitmask(m:k, yes:max, no:f32x4::ZERO))
2412 }
2413}
2414
2415/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.
2416///
2417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_pd&expand=3645)
2418#[inline]
2419#[target_feature(enable = "avx512f")]
2420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2421#[cfg_attr(test, assert_instr(vmaxpd))]
2422pub fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
2423 unsafe { transmute(src:vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
2424}
2425
2426/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2427///
2428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_pd&expand=3643)
2429#[inline]
2430#[target_feature(enable = "avx512f")]
2431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2432#[cfg_attr(test, assert_instr(vmaxpd))]
2433pub fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2434 unsafe {
2435 let max: f64x8 = _mm512_max_pd(a, b).as_f64x8();
2436 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x8()))
2437 }
2438}
2439
2440/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2441///
2442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_pd&expand=3644)
2443#[inline]
2444#[target_feature(enable = "avx512f")]
2445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2446#[cfg_attr(test, assert_instr(vmaxpd))]
2447pub fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
2448 unsafe {
2449 let max: f64x8 = _mm512_max_pd(a, b).as_f64x8();
2450 transmute(src:simd_select_bitmask(m:k, yes:max, no:f64x8::ZERO))
2451 }
2452}
2453
2454/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2455///
2456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_pd&expand=3640)
2457#[inline]
2458#[target_feature(enable = "avx512f,avx512vl")]
2459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2460#[cfg_attr(test, assert_instr(vmaxpd))]
2461pub fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2462 unsafe {
2463 let max: f64x4 = _mm256_max_pd(a, b).as_f64x4();
2464 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x4()))
2465 }
2466}
2467
2468/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2469///
2470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_pd&expand=3641)
2471#[inline]
2472#[target_feature(enable = "avx512f,avx512vl")]
2473#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2474#[cfg_attr(test, assert_instr(vmaxpd))]
2475pub fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
2476 unsafe {
2477 let max: f64x4 = _mm256_max_pd(a, b).as_f64x4();
2478 transmute(src:simd_select_bitmask(m:k, yes:max, no:f64x4::ZERO))
2479 }
2480}
2481
2482/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2483///
2484/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_pd&expand=3637)
2485#[inline]
2486#[target_feature(enable = "avx512f,avx512vl")]
2487#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2488#[cfg_attr(test, assert_instr(vmaxpd))]
2489pub fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2490 unsafe {
2491 let max: f64x2 = _mm_max_pd(a, b).as_f64x2();
2492 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_f64x2()))
2493 }
2494}
2495
2496/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2497///
2498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_pd&expand=3638)
2499#[inline]
2500#[target_feature(enable = "avx512f,avx512vl")]
2501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2502#[cfg_attr(test, assert_instr(vmaxpd))]
2503pub fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
2504 unsafe {
2505 let max: f64x2 = _mm_max_pd(a, b).as_f64x2();
2506 transmute(src:simd_select_bitmask(m:k, yes:max, no:f64x2::ZERO))
2507 }
2508}
2509
2510/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst.
2511///
2512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu32&expand=3618)
2513#[inline]
2514#[target_feature(enable = "avx512f")]
2515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2516#[cfg_attr(test, assert_instr(vpmaxud))]
2517pub fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
2518 unsafe {
2519 let a: u32x16 = a.as_u32x16();
2520 let b: u32x16 = b.as_u32x16();
2521 transmute(src:simd_select::<i32x16, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2522 }
2523}
2524
2525/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2526///
2527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu32&expand=3616)
2528#[inline]
2529#[target_feature(enable = "avx512f")]
2530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2531#[cfg_attr(test, assert_instr(vpmaxud))]
2532pub fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2533 unsafe {
2534 let max: u32x16 = _mm512_max_epu32(a, b).as_u32x16();
2535 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x16()))
2536 }
2537}
2538
2539/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2540///
2541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu32&expand=3617)
2542#[inline]
2543#[target_feature(enable = "avx512f")]
2544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2545#[cfg_attr(test, assert_instr(vpmaxud))]
2546pub fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2547 unsafe {
2548 let max: u32x16 = _mm512_max_epu32(a, b).as_u32x16();
2549 transmute(src:simd_select_bitmask(m:k, yes:max, no:u32x16::ZERO))
2550 }
2551}
2552
2553/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2554///
2555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu32&expand=3613)
2556#[inline]
2557#[target_feature(enable = "avx512f,avx512vl")]
2558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2559#[cfg_attr(test, assert_instr(vpmaxud))]
2560pub fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2561 unsafe {
2562 let max: u32x8 = _mm256_max_epu32(a, b).as_u32x8();
2563 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x8()))
2564 }
2565}
2566
2567/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2568///
2569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu32&expand=3614)
2570#[inline]
2571#[target_feature(enable = "avx512f,avx512vl")]
2572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2573#[cfg_attr(test, assert_instr(vpmaxud))]
2574pub fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2575 unsafe {
2576 let max: u32x8 = _mm256_max_epu32(a, b).as_u32x8();
2577 transmute(src:simd_select_bitmask(m:k, yes:max, no:u32x8::ZERO))
2578 }
2579}
2580
2581/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2582///
2583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu32&expand=3610)
2584#[inline]
2585#[target_feature(enable = "avx512f,avx512vl")]
2586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2587#[cfg_attr(test, assert_instr(vpmaxud))]
2588pub fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2589 unsafe {
2590 let max: u32x4 = _mm_max_epu32(a, b).as_u32x4();
2591 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u32x4()))
2592 }
2593}
2594
2595/// Compare packed unsigned 32-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2596///
2597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu32&expand=3611)
2598#[inline]
2599#[target_feature(enable = "avx512f,avx512vl")]
2600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2601#[cfg_attr(test, assert_instr(vpmaxud))]
2602pub fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2603 unsafe {
2604 let max: u32x4 = _mm_max_epu32(a, b).as_u32x4();
2605 transmute(src:simd_select_bitmask(m:k, yes:max, no:u32x4::ZERO))
2606 }
2607}
2608
2609/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2610///
2611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_epu64&expand=3627)
2612#[inline]
2613#[target_feature(enable = "avx512f")]
2614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2615#[cfg_attr(test, assert_instr(vpmaxuq))]
2616pub fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
2617 unsafe {
2618 let a: u64x8 = a.as_u64x8();
2619 let b: u64x8 = b.as_u64x8();
2620 transmute(src:simd_select::<i64x8, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2621 }
2622}
2623
2624/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2625///
2626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_epu64&expand=3625)
2627#[inline]
2628#[target_feature(enable = "avx512f")]
2629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2630#[cfg_attr(test, assert_instr(vpmaxuq))]
2631pub fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2632 unsafe {
2633 let max: u64x8 = _mm512_max_epu64(a, b).as_u64x8();
2634 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x8()))
2635 }
2636}
2637
2638/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2639///
2640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_epu64&expand=3626)
2641#[inline]
2642#[target_feature(enable = "avx512f")]
2643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2644#[cfg_attr(test, assert_instr(vpmaxuq))]
2645pub fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2646 unsafe {
2647 let max: u64x8 = _mm512_max_epu64(a, b).as_u64x8();
2648 transmute(src:simd_select_bitmask(m:k, yes:max, no:u64x8::ZERO))
2649 }
2650}
2651
2652/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2653///
2654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_max_epu64&expand=3624)
2655#[inline]
2656#[target_feature(enable = "avx512f,avx512vl")]
2657#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2658#[cfg_attr(test, assert_instr(vpmaxuq))]
2659pub fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
2660 unsafe {
2661 let a: u64x4 = a.as_u64x4();
2662 let b: u64x4 = b.as_u64x4();
2663 transmute(src:simd_select::<i64x4, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2664 }
2665}
2666
2667/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2668///
2669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_max_epu64&expand=3622)
2670#[inline]
2671#[target_feature(enable = "avx512f,avx512vl")]
2672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2673#[cfg_attr(test, assert_instr(vpmaxuq))]
2674pub fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2675 unsafe {
2676 let max: u64x4 = _mm256_max_epu64(a, b).as_u64x4();
2677 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x4()))
2678 }
2679}
2680
2681/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2682///
2683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_max_epu64&expand=3623)
2684#[inline]
2685#[target_feature(enable = "avx512f,avx512vl")]
2686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2687#[cfg_attr(test, assert_instr(vpmaxuq))]
2688pub fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2689 unsafe {
2690 let max: u64x4 = _mm256_max_epu64(a, b).as_u64x4();
2691 transmute(src:simd_select_bitmask(m:k, yes:max, no:u64x4::ZERO))
2692 }
2693}
2694
2695/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst.
2696///
2697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_max_epu64&expand=3621)
2698#[inline]
2699#[target_feature(enable = "avx512f,avx512vl")]
2700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2701#[cfg_attr(test, assert_instr(vpmaxuq))]
2702pub fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
2703 unsafe {
2704 let a: u64x2 = a.as_u64x2();
2705 let b: u64x2 = b.as_u64x2();
2706 transmute(src:simd_select::<i64x2, _>(mask:simd_gt(a, b), if_true:a, if_false:b))
2707 }
2708}
2709
2710/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2711///
2712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_epu64&expand=3619)
2713#[inline]
2714#[target_feature(enable = "avx512f,avx512vl")]
2715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2716#[cfg_attr(test, assert_instr(vpmaxuq))]
2717pub fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2718 unsafe {
2719 let max: u64x2 = _mm_max_epu64(a, b).as_u64x2();
2720 transmute(src:simd_select_bitmask(m:k, yes:max, no:src.as_u64x2()))
2721 }
2722}
2723
2724/// Compare packed unsigned 64-bit integers in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2725///
2726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_epu64&expand=3620)
2727#[inline]
2728#[target_feature(enable = "avx512f,avx512vl")]
2729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2730#[cfg_attr(test, assert_instr(vpmaxuq))]
2731pub fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2732 unsafe {
2733 let max: u64x2 = _mm_max_epu64(a, b).as_u64x2();
2734 transmute(src:simd_select_bitmask(m:k, yes:max, no:u64x2::ZERO))
2735 }
2736}
2737
2738/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst.
2739///
2740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi32&expand=3696)
2741#[inline]
2742#[target_feature(enable = "avx512f")]
2743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2744#[cfg_attr(test, assert_instr(vpminsd))]
2745pub fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
2746 unsafe {
2747 let a: i32x16 = a.as_i32x16();
2748 let b: i32x16 = b.as_i32x16();
2749 transmute(src:simd_select::<i32x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2750 }
2751}
2752
2753/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2754///
2755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi32&expand=3694)
2756#[inline]
2757#[target_feature(enable = "avx512f")]
2758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2759#[cfg_attr(test, assert_instr(vpminsd))]
2760pub fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2761 unsafe {
2762 let min: i32x16 = _mm512_min_epi32(a, b).as_i32x16();
2763 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x16()))
2764 }
2765}
2766
2767/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2768///
2769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi32&expand=3695)
2770#[inline]
2771#[target_feature(enable = "avx512f")]
2772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2773#[cfg_attr(test, assert_instr(vpminsd))]
2774pub fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
2775 unsafe {
2776 let min: i32x16 = _mm512_min_epi32(a, b).as_i32x16();
2777 transmute(src:simd_select_bitmask(m:k, yes:min, no:i32x16::ZERO))
2778 }
2779}
2780
2781/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2782///
2783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi32&expand=3691)
2784#[inline]
2785#[target_feature(enable = "avx512f,avx512vl")]
2786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2787#[cfg_attr(test, assert_instr(vpminsd))]
2788pub fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2789 unsafe {
2790 let min: i32x8 = _mm256_min_epi32(a, b).as_i32x8();
2791 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x8()))
2792 }
2793}
2794
2795/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2796///
2797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi32&expand=3692)
2798#[inline]
2799#[target_feature(enable = "avx512f,avx512vl")]
2800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2801#[cfg_attr(test, assert_instr(vpminsd))]
2802pub fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2803 unsafe {
2804 let min: i32x8 = _mm256_min_epi32(a, b).as_i32x8();
2805 transmute(src:simd_select_bitmask(m:k, yes:min, no:i32x8::ZERO))
2806 }
2807}
2808
2809/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2810///
2811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi32&expand=3688)
2812#[inline]
2813#[target_feature(enable = "avx512f,avx512vl")]
2814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2815#[cfg_attr(test, assert_instr(vpminsd))]
2816pub fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2817 unsafe {
2818 let min: i32x4 = _mm_min_epi32(a, b).as_i32x4();
2819 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i32x4()))
2820 }
2821}
2822
2823/// Compare packed signed 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2824///
2825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi32&expand=3689)
2826#[inline]
2827#[target_feature(enable = "avx512f,avx512vl")]
2828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2829#[cfg_attr(test, assert_instr(vpminsd))]
2830pub fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2831 unsafe {
2832 let min: i32x4 = _mm_min_epi32(a, b).as_i32x4();
2833 transmute(src:simd_select_bitmask(m:k, yes:min, no:i32x4::ZERO))
2834 }
2835}
2836
2837/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2838///
2839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epi64&expand=3705)
2840#[inline]
2841#[target_feature(enable = "avx512f")]
2842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2843#[cfg_attr(test, assert_instr(vpminsq))]
2844pub fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
2845 unsafe {
2846 let a: i64x8 = a.as_i64x8();
2847 let b: i64x8 = b.as_i64x8();
2848 transmute(src:simd_select::<i64x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2849 }
2850}
2851
2852/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2853///
2854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epi64&expand=3703)
2855#[inline]
2856#[target_feature(enable = "avx512f")]
2857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2858#[cfg_attr(test, assert_instr(vpminsq))]
2859pub fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2860 unsafe {
2861 let min: i64x8 = _mm512_min_epi64(a, b).as_i64x8();
2862 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x8()))
2863 }
2864}
2865
2866/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2867///
2868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epi64&expand=3704)
2869#[inline]
2870#[target_feature(enable = "avx512f")]
2871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2872#[cfg_attr(test, assert_instr(vpminsq))]
2873pub fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
2874 unsafe {
2875 let min: i64x8 = _mm512_min_epi64(a, b).as_i64x8();
2876 transmute(src:simd_select_bitmask(m:k, yes:min, no:i64x8::ZERO))
2877 }
2878}
2879
2880/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2881///
2882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epi64&expand=3702)
2883#[inline]
2884#[target_feature(enable = "avx512f,avx512vl")]
2885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2886#[cfg_attr(test, assert_instr(vpminsq))]
2887pub fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
2888 unsafe {
2889 let a: i64x4 = a.as_i64x4();
2890 let b: i64x4 = b.as_i64x4();
2891 transmute(src:simd_select::<i64x4, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2892 }
2893}
2894
2895/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2896///
2897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epi64&expand=3700)
2898#[inline]
2899#[target_feature(enable = "avx512f,avx512vl")]
2900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2901#[cfg_attr(test, assert_instr(vpminsq))]
2902pub fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2903 unsafe {
2904 let min: i64x4 = _mm256_min_epi64(a, b).as_i64x4();
2905 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x4()))
2906 }
2907}
2908
2909/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2910///
2911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epi64&expand=3701)
2912#[inline]
2913#[target_feature(enable = "avx512f,avx512vl")]
2914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2915#[cfg_attr(test, assert_instr(vpminsq))]
2916pub fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
2917 unsafe {
2918 let min: i64x4 = _mm256_min_epi64(a, b).as_i64x4();
2919 transmute(src:simd_select_bitmask(m:k, yes:min, no:i64x4::ZERO))
2920 }
2921}
2922
2923/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst.
2924///
2925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epi64)
2926#[inline]
2927#[target_feature(enable = "avx512f,avx512vl")]
2928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2929#[cfg_attr(test, assert_instr(vpminsq))]
2930pub fn _mm_min_epi64(a: __m128i, b: __m128i) -> __m128i {
2931 unsafe {
2932 let a: i64x2 = a.as_i64x2();
2933 let b: i64x2 = b.as_i64x2();
2934 transmute(src:simd_select::<i64x2, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
2935 }
2936}
2937
2938/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2939///
2940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epi64)
2941#[inline]
2942#[target_feature(enable = "avx512f,avx512vl")]
2943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2944#[cfg_attr(test, assert_instr(vpminsq))]
2945pub fn _mm_mask_min_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2946 unsafe {
2947 let min: i64x2 = _mm_min_epi64(a, b).as_i64x2();
2948 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_i64x2()))
2949 }
2950}
2951
2952/// Compare packed signed 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2953///
2954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epi64)
2955#[inline]
2956#[target_feature(enable = "avx512f,avx512vl")]
2957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2958#[cfg_attr(test, assert_instr(vpminsq))]
2959pub fn _mm_maskz_min_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
2960 unsafe {
2961 let min: i64x2 = _mm_min_epi64(a, b).as_i64x2();
2962 transmute(src:simd_select_bitmask(m:k, yes:min, no:i64x2::ZERO))
2963 }
2964}
2965
2966/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.
2967///
2968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_ps&expand=3769)
2969#[inline]
2970#[target_feature(enable = "avx512f")]
2971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2972#[cfg_attr(test, assert_instr(vminps))]
2973pub fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
2974 unsafe {
2975 transmute(src:vminps(
2976 a.as_f32x16(),
2977 b.as_f32x16(),
2978 _MM_FROUND_CUR_DIRECTION,
2979 ))
2980 }
2981}
2982
2983/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
2984///
2985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_ps&expand=3767)
2986#[inline]
2987#[target_feature(enable = "avx512f")]
2988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
2989#[cfg_attr(test, assert_instr(vminps))]
2990pub fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
2991 unsafe {
2992 let min: f32x16 = _mm512_min_ps(a, b).as_f32x16();
2993 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x16()))
2994 }
2995}
2996
2997/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
2998///
2999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_ps&expand=3768)
3000#[inline]
3001#[target_feature(enable = "avx512f")]
3002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3003#[cfg_attr(test, assert_instr(vminps))]
3004pub fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
3005 unsafe {
3006 let min: f32x16 = _mm512_min_ps(a, b).as_f32x16();
3007 transmute(src:simd_select_bitmask(m:k, yes:min, no:f32x16::ZERO))
3008 }
3009}
3010
3011/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3012///
3013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_ps&expand=3764)
3014#[inline]
3015#[target_feature(enable = "avx512f,avx512vl")]
3016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3017#[cfg_attr(test, assert_instr(vminps))]
3018pub fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
3019 unsafe {
3020 let min: f32x8 = _mm256_min_ps(a, b).as_f32x8();
3021 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x8()))
3022 }
3023}
3024
3025/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3026///
3027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_ps&expand=3765)
3028#[inline]
3029#[target_feature(enable = "avx512f,avx512vl")]
3030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3031#[cfg_attr(test, assert_instr(vminps))]
3032pub fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
3033 unsafe {
3034 let min: f32x8 = _mm256_min_ps(a, b).as_f32x8();
3035 transmute(src:simd_select_bitmask(m:k, yes:min, no:f32x8::ZERO))
3036 }
3037}
3038
3039/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3040///
3041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_ps&expand=3761)
3042#[inline]
3043#[target_feature(enable = "avx512f,avx512vl")]
3044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3045#[cfg_attr(test, assert_instr(vminps))]
3046pub fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
3047 unsafe {
3048 let min: f32x4 = _mm_min_ps(a, b).as_f32x4();
3049 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f32x4()))
3050 }
3051}
3052
3053/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3054///
3055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_ps&expand=3762)
3056#[inline]
3057#[target_feature(enable = "avx512f,avx512vl")]
3058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3059#[cfg_attr(test, assert_instr(vminps))]
3060pub fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
3061 unsafe {
3062 let min: f32x4 = _mm_min_ps(a, b).as_f32x4();
3063 transmute(src:simd_select_bitmask(m:k, yes:min, no:f32x4::ZERO))
3064 }
3065}
3066
3067/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.
3068///
3069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_pd&expand=3759)
3070#[inline]
3071#[target_feature(enable = "avx512f")]
3072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3073#[cfg_attr(test, assert_instr(vminpd))]
3074pub fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
3075 unsafe { transmute(src:vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION)) }
3076}
3077
3078/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3079///
3080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_pd&expand=3757)
3081#[inline]
3082#[target_feature(enable = "avx512f")]
3083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3084#[cfg_attr(test, assert_instr(vminpd))]
3085pub fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3086 unsafe {
3087 let min: f64x8 = _mm512_min_pd(a, b).as_f64x8();
3088 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x8()))
3089 }
3090}
3091
3092/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3093///
3094/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_pd&expand=3758)
3095#[inline]
3096#[target_feature(enable = "avx512f")]
3097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3098#[cfg_attr(test, assert_instr(vminpd))]
3099pub fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
3100 unsafe {
3101 let min: f64x8 = _mm512_min_pd(a, b).as_f64x8();
3102 transmute(src:simd_select_bitmask(m:k, yes:min, no:f64x8::ZERO))
3103 }
3104}
3105
3106/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3107///
3108/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_pd&expand=3754)
3109#[inline]
3110#[target_feature(enable = "avx512f,avx512vl")]
3111#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3112#[cfg_attr(test, assert_instr(vminpd))]
3113pub fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3114 unsafe {
3115 let min: f64x4 = _mm256_min_pd(a, b).as_f64x4();
3116 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x4()))
3117 }
3118}
3119
3120/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3121///
3122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_pd&expand=3755)
3123#[inline]
3124#[target_feature(enable = "avx512f,avx512vl")]
3125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3126#[cfg_attr(test, assert_instr(vminpd))]
3127pub fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
3128 unsafe {
3129 let min: f64x4 = _mm256_min_pd(a, b).as_f64x4();
3130 transmute(src:simd_select_bitmask(m:k, yes:min, no:f64x4::ZERO))
3131 }
3132}
3133
3134/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3135///
3136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_pd&expand=3751)
3137#[inline]
3138#[target_feature(enable = "avx512f,avx512vl")]
3139#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3140#[cfg_attr(test, assert_instr(vminpd))]
3141pub fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3142 unsafe {
3143 let min: f64x2 = _mm_min_pd(a, b).as_f64x2();
3144 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_f64x2()))
3145 }
3146}
3147
3148/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3149///
3150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_pd&expand=3752)
3151#[inline]
3152#[target_feature(enable = "avx512f,avx512vl")]
3153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3154#[cfg_attr(test, assert_instr(vminpd))]
3155pub fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
3156 unsafe {
3157 let min: f64x2 = _mm_min_pd(a, b).as_f64x2();
3158 transmute(src:simd_select_bitmask(m:k, yes:min, no:f64x2::ZERO))
3159 }
3160}
3161
3162/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst.
3163///
3164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu32&expand=3732)
3165#[inline]
3166#[target_feature(enable = "avx512f")]
3167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3168#[cfg_attr(test, assert_instr(vpminud))]
3169pub fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
3170 unsafe {
3171 let a: u32x16 = a.as_u32x16();
3172 let b: u32x16 = b.as_u32x16();
3173 transmute(src:simd_select::<i32x16, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
3174 }
3175}
3176
3177/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3178///
3179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu32&expand=3730)
3180#[inline]
3181#[target_feature(enable = "avx512f")]
3182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3183#[cfg_attr(test, assert_instr(vpminud))]
3184pub fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3185 unsafe {
3186 let min: u32x16 = _mm512_min_epu32(a, b).as_u32x16();
3187 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x16()))
3188 }
3189}
3190
3191/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3192///
3193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu32&expand=3731)
3194#[inline]
3195#[target_feature(enable = "avx512f")]
3196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3197#[cfg_attr(test, assert_instr(vpminud))]
3198pub fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
3199 unsafe {
3200 let min: u32x16 = _mm512_min_epu32(a, b).as_u32x16();
3201 transmute(src:simd_select_bitmask(m:k, yes:min, no:u32x16::ZERO))
3202 }
3203}
3204
3205/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3206///
3207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu32&expand=3727)
3208#[inline]
3209#[target_feature(enable = "avx512f,avx512vl")]
3210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3211#[cfg_attr(test, assert_instr(vpminud))]
3212pub fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3213 unsafe {
3214 let min: u32x8 = _mm256_min_epu32(a, b).as_u32x8();
3215 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x8()))
3216 }
3217}
3218
3219/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3220///
3221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu32&expand=3728)
3222#[inline]
3223#[target_feature(enable = "avx512f,avx512vl")]
3224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3225#[cfg_attr(test, assert_instr(vpminud))]
3226pub fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3227 unsafe {
3228 let min: u32x8 = _mm256_min_epu32(a, b).as_u32x8();
3229 transmute(src:simd_select_bitmask(m:k, yes:min, no:u32x8::ZERO))
3230 }
3231}
3232
3233/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3234///
3235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu32&expand=3724)
3236#[inline]
3237#[target_feature(enable = "avx512f,avx512vl")]
3238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3239#[cfg_attr(test, assert_instr(vpminud))]
3240pub fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3241 unsafe {
3242 let min: u32x4 = _mm_min_epu32(a, b).as_u32x4();
3243 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u32x4()))
3244 }
3245}
3246
3247/// Compare packed unsigned 32-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3248///
3249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu32&expand=3725)
3250#[inline]
3251#[target_feature(enable = "avx512f,avx512vl")]
3252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3253#[cfg_attr(test, assert_instr(vpminud))]
3254pub fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3255 unsafe {
3256 let min: u32x4 = _mm_min_epu32(a, b).as_u32x4();
3257 transmute(src:simd_select_bitmask(m:k, yes:min, no:u32x4::ZERO))
3258 }
3259}
3260
3261/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3262///
3263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_epu64&expand=3741)
3264#[inline]
3265#[target_feature(enable = "avx512f")]
3266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3267#[cfg_attr(test, assert_instr(vpminuq))]
3268pub fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
3269 unsafe {
3270 let a: u64x8 = a.as_u64x8();
3271 let b: u64x8 = b.as_u64x8();
3272 transmute(src:simd_select::<i64x8, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
3273 }
3274}
3275
3276/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3277///
3278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_epu64&expand=3739)
3279#[inline]
3280#[target_feature(enable = "avx512f")]
3281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3282#[cfg_attr(test, assert_instr(vpminuq))]
3283pub fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3284 unsafe {
3285 let min: u64x8 = _mm512_min_epu64(a, b).as_u64x8();
3286 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x8()))
3287 }
3288}
3289
3290/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3291///
3292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_epu64&expand=3740)
3293#[inline]
3294#[target_feature(enable = "avx512f")]
3295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3296#[cfg_attr(test, assert_instr(vpminuq))]
3297pub fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
3298 unsafe {
3299 let min: u64x8 = _mm512_min_epu64(a, b).as_u64x8();
3300 transmute(src:simd_select_bitmask(m:k, yes:min, no:u64x8::ZERO))
3301 }
3302}
3303
3304/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3305///
3306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_min_epu64&expand=3738)
3307#[inline]
3308#[target_feature(enable = "avx512f,avx512vl")]
3309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3310#[cfg_attr(test, assert_instr(vpminuq))]
3311pub fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
3312 unsafe {
3313 let a: u64x4 = a.as_u64x4();
3314 let b: u64x4 = b.as_u64x4();
3315 transmute(src:simd_select::<i64x4, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
3316 }
3317}
3318
3319/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3320///
3321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_min_epu64&expand=3736)
3322#[inline]
3323#[target_feature(enable = "avx512f,avx512vl")]
3324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3325#[cfg_attr(test, assert_instr(vpminuq))]
3326pub fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3327 unsafe {
3328 let min: u64x4 = _mm256_min_epu64(a, b).as_u64x4();
3329 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x4()))
3330 }
3331}
3332
3333/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3334///
3335/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_min_epu64&expand=3737)
3336#[inline]
3337#[target_feature(enable = "avx512f,avx512vl")]
3338#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3339#[cfg_attr(test, assert_instr(vpminuq))]
3340pub fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
3341 unsafe {
3342 let min: u64x4 = _mm256_min_epu64(a, b).as_u64x4();
3343 transmute(src:simd_select_bitmask(m:k, yes:min, no:u64x4::ZERO))
3344 }
3345}
3346
3347/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst.
3348///
3349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_min_epu64&expand=3735)
3350#[inline]
3351#[target_feature(enable = "avx512f,avx512vl")]
3352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3353#[cfg_attr(test, assert_instr(vpminuq))]
3354pub fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
3355 unsafe {
3356 let a: u64x2 = a.as_u64x2();
3357 let b: u64x2 = b.as_u64x2();
3358 transmute(src:simd_select::<i64x2, _>(mask:simd_lt(a, b), if_true:a, if_false:b))
3359 }
3360}
3361
3362/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3363///
3364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_min_epu64&expand=3733)
3365#[inline]
3366#[target_feature(enable = "avx512f,avx512vl")]
3367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3368#[cfg_attr(test, assert_instr(vpminuq))]
3369pub fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3370 unsafe {
3371 let min: u64x2 = _mm_min_epu64(a, b).as_u64x2();
3372 transmute(src:simd_select_bitmask(m:k, yes:min, no:src.as_u64x2()))
3373 }
3374}
3375
3376/// Compare packed unsigned 64-bit integers in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3377///
3378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_min_epu64&expand=3734)
3379#[inline]
3380#[target_feature(enable = "avx512f,avx512vl")]
3381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3382#[cfg_attr(test, assert_instr(vpminuq))]
3383pub fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
3384 unsafe {
3385 let min: u64x2 = _mm_min_epu64(a, b).as_u64x2();
3386 transmute(src:simd_select_bitmask(m:k, yes:min, no:u64x2::ZERO))
3387 }
3388}
3389
3390/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.
3391///
3392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_ps&expand=5371)
3393#[inline]
3394#[target_feature(enable = "avx512f")]
3395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3396#[cfg_attr(test, assert_instr(vsqrtps))]
3397pub fn _mm512_sqrt_ps(a: __m512) -> __m512 {
3398 unsafe { simd_fsqrt(a) }
3399}
3400
3401/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3402///
3403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_ps&expand=5369)
3404#[inline]
3405#[target_feature(enable = "avx512f")]
3406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3407#[cfg_attr(test, assert_instr(vsqrtps))]
3408pub fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
3409 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3410}
3411
3412/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3413///
3414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_ps&expand=5370)
3415#[inline]
3416#[target_feature(enable = "avx512f")]
3417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3418#[cfg_attr(test, assert_instr(vsqrtps))]
3419pub fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
3420 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm512_setzero_ps()) }
3421}
3422
3423/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3424///
3425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_ps&expand=5366)
3426#[inline]
3427#[target_feature(enable = "avx512f,avx512vl")]
3428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3429#[cfg_attr(test, assert_instr(vsqrtps))]
3430pub fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
3431 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3432}
3433
3434/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3435///
3436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_ps&expand=5367)
3437#[inline]
3438#[target_feature(enable = "avx512f,avx512vl")]
3439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3440#[cfg_attr(test, assert_instr(vsqrtps))]
3441pub fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
3442 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm256_setzero_ps()) }
3443}
3444
3445/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3446///
3447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_ps&expand=5363)
3448#[inline]
3449#[target_feature(enable = "avx512f,avx512vl")]
3450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3451#[cfg_attr(test, assert_instr(vsqrtps))]
3452pub fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
3453 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3454}
3455
3456/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3457///
3458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_ps&expand=5364)
3459#[inline]
3460#[target_feature(enable = "avx512f,avx512vl")]
3461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3462#[cfg_attr(test, assert_instr(vsqrtps))]
3463pub fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
3464 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm_setzero_ps()) }
3465}
3466
3467/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.
3468///
3469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_pd&expand=5362)
3470#[inline]
3471#[target_feature(enable = "avx512f")]
3472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3473#[cfg_attr(test, assert_instr(vsqrtpd))]
3474pub fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
3475 unsafe { simd_fsqrt(a) }
3476}
3477
3478/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3479///
3480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_pd&expand=5360)
3481#[inline]
3482#[target_feature(enable = "avx512f")]
3483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3484#[cfg_attr(test, assert_instr(vsqrtpd))]
3485pub fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
3486 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3487}
3488
3489/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3490///
3491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_pd&expand=5361)
3492#[inline]
3493#[target_feature(enable = "avx512f")]
3494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3495#[cfg_attr(test, assert_instr(vsqrtpd))]
3496pub fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
3497 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm512_setzero_pd()) }
3498}
3499
3500/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3501///
3502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sqrt_pd&expand=5357)
3503#[inline]
3504#[target_feature(enable = "avx512f,avx512vl")]
3505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3506#[cfg_attr(test, assert_instr(vsqrtpd))]
3507pub fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
3508 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3509}
3510
3511/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3512///
3513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sqrt_pd&expand=5358)
3514#[inline]
3515#[target_feature(enable = "avx512f,avx512vl")]
3516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3517#[cfg_attr(test, assert_instr(vsqrtpd))]
3518pub fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
3519 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm256_setzero_pd()) }
3520}
3521
3522/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
3523///
3524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sqrt_pd&expand=5354)
3525#[inline]
3526#[target_feature(enable = "avx512f,avx512vl")]
3527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3528#[cfg_attr(test, assert_instr(vsqrtpd))]
3529pub fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
3530 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:src) }
3531}
3532
3533/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3534///
3535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sqrt_pd&expand=5355)
3536#[inline]
3537#[target_feature(enable = "avx512f,avx512vl")]
3538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3539#[cfg_attr(test, assert_instr(vsqrtpd))]
3540pub fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
3541 unsafe { simd_select_bitmask(m:k, yes:simd_fsqrt(a), no:_mm_setzero_pd()) }
3542}
3543
3544/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3545///
3546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_ps&expand=2557)
3547#[inline]
3548#[target_feature(enable = "avx512f")]
3549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3550#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3551pub fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3552 unsafe { simd_fma(x:a, y:b, z:c) }
3553}
3554
3555/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3556///
3557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_ps&expand=2558)
3558#[inline]
3559#[target_feature(enable = "avx512f")]
3560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3561#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3562pub fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3563 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ps(a, b, c), no:a) }
3564}
3565
3566/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3567///
3568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_ps&expand=2560)
3569#[inline]
3570#[target_feature(enable = "avx512f")]
3571#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3572#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3573pub fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3574 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ps(a, b, c), no:_mm512_setzero_ps()) }
3575}
3576
3577/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3578///
3579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_ps&expand=2559)
3580#[inline]
3581#[target_feature(enable = "avx512f")]
3582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3583#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3584pub fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3585 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_ps(a, b, c), no:c) }
3586}
3587
3588/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3589///
3590/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_ps&expand=2554)
3591#[inline]
3592#[target_feature(enable = "avx512f,avx512vl")]
3593#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3594#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3595pub fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3596 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ps(a, b, c), no:a) }
3597}
3598
3599/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3600///
3601/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_ps&expand=2556)
3602#[inline]
3603#[target_feature(enable = "avx512f,avx512vl")]
3604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3605#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3606pub fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3607 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ps(a, b, c), no:_mm256_setzero_ps()) }
3608}
3609
3610/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3611///
3612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_ps&expand=2555)
3613#[inline]
3614#[target_feature(enable = "avx512f,avx512vl")]
3615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3616#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3617pub fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3618 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_ps(a, b, c), no:c) }
3619}
3620
3621/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3622///
3623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_ps&expand=2550)
3624#[inline]
3625#[target_feature(enable = "avx512f,avx512vl")]
3626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3627#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3628pub fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3629 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ps(a, b, c), no:a) }
3630}
3631
3632/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3633///
3634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_ps&expand=2552)
3635#[inline]
3636#[target_feature(enable = "avx512f,avx512vl")]
3637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3638#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3639pub fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3640 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ps(a, b, c), no:_mm_setzero_ps()) }
3641}
3642
3643/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3644///
3645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_ps&expand=2551)
3646#[inline]
3647#[target_feature(enable = "avx512f,avx512vl")]
3648#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3649#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
3650pub fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3651 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_ps(a, b, c), no:c) }
3652}
3653
3654/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.
3655///
3656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_pd&expand=2545)
3657#[inline]
3658#[target_feature(enable = "avx512f")]
3659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3660#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3661pub fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3662 unsafe { simd_fma(x:a, y:b, z:c) }
3663}
3664
3665/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3666///
3667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_pd&expand=2546)
3668#[inline]
3669#[target_feature(enable = "avx512f")]
3670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3671#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3672pub fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3673 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_pd(a, b, c), no:a) }
3674}
3675
3676/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3677///
3678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_pd&expand=2548)
3679#[inline]
3680#[target_feature(enable = "avx512f")]
3681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3682#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3683pub fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3684 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_pd(a, b, c), no:_mm512_setzero_pd()) }
3685}
3686
3687/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3688///
3689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_pd&expand=2547)
3690#[inline]
3691#[target_feature(enable = "avx512f")]
3692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3693#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3694pub fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3695 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmadd_pd(a, b, c), no:c) }
3696}
3697
3698/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3699///
3700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmadd_pd&expand=2542)
3701#[inline]
3702#[target_feature(enable = "avx512f,avx512vl")]
3703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3704#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3705pub fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3706 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_pd(a, b, c), no:a) }
3707}
3708
3709/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3710///
3711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmadd_pd&expand=2544)
3712#[inline]
3713#[target_feature(enable = "avx512f,avx512vl")]
3714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3715#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3716pub fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3717 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_pd(a, b, c), no:_mm256_setzero_pd()) }
3718}
3719
3720/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3721///
3722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmadd_pd&expand=2543)
3723#[inline]
3724#[target_feature(enable = "avx512f,avx512vl")]
3725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3726#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3727pub fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3728 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmadd_pd(a, b, c), no:c) }
3729}
3730
3731/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3732///
3733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmadd_pd&expand=2538)
3734#[inline]
3735#[target_feature(enable = "avx512f,avx512vl")]
3736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3737#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3738pub fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3739 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_pd(a, b, c), no:a) }
3740}
3741
3742/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3743///
3744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmadd_pd&expand=2540)
3745#[inline]
3746#[target_feature(enable = "avx512f,avx512vl")]
3747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3748#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3749pub fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3750 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_pd(a, b, c), no:_mm_setzero_pd()) }
3751}
3752
3753/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3754///
3755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmadd_pd&expand=2539)
3756#[inline]
3757#[target_feature(enable = "avx512f,avx512vl")]
3758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3759#[cfg_attr(test, assert_instr(vfmadd))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
3760pub fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3761 unsafe { simd_select_bitmask(m:k, yes:_mm_fmadd_pd(a, b, c), no:c) }
3762}
3763
3764/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3765///
3766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_ps&expand=2643)
3767#[inline]
3768#[target_feature(enable = "avx512f")]
3769#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3770#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3771pub fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3772 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
3773}
3774
3775/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3776///
3777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_ps&expand=2644)
3778#[inline]
3779#[target_feature(enable = "avx512f")]
3780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3781#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3782pub fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
3783 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ps(a, b, c), no:a) }
3784}
3785
3786/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3787///
3788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_ps&expand=2646)
3789#[inline]
3790#[target_feature(enable = "avx512f")]
3791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3792#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3793pub fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
3794 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ps(a, b, c), no:_mm512_setzero_ps()) }
3795}
3796
3797/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3798///
3799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_ps&expand=2645)
3800#[inline]
3801#[target_feature(enable = "avx512f")]
3802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3803#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3804pub fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
3805 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_ps(a, b, c), no:c) }
3806}
3807
3808/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3809///
3810/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_ps&expand=2640)
3811#[inline]
3812#[target_feature(enable = "avx512f,avx512vl")]
3813#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3814#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3815pub fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
3816 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ps(a, b, c), no:a) }
3817}
3818
3819/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3820///
3821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_ps&expand=2642)
3822#[inline]
3823#[target_feature(enable = "avx512f,avx512vl")]
3824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3825#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3826pub fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
3827 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ps(a, b, c), no:_mm256_setzero_ps()) }
3828}
3829
3830/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3831///
3832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_ps&expand=2641)
3833#[inline]
3834#[target_feature(enable = "avx512f,avx512vl")]
3835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3836#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3837pub fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
3838 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_ps(a, b, c), no:c) }
3839}
3840
3841/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3842///
3843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_ps&expand=2636)
3844#[inline]
3845#[target_feature(enable = "avx512f,avx512vl")]
3846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3847#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3848pub fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
3849 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ps(a, b, c), no:a) }
3850}
3851
3852/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3853///
3854/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_ps&expand=2638)
3855#[inline]
3856#[target_feature(enable = "avx512f,avx512vl")]
3857#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3858#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3859pub fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
3860 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ps(a, b, c), no:_mm_setzero_ps()) }
3861}
3862
3863/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3864///
3865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_ps&expand=2637)
3866#[inline]
3867#[target_feature(enable = "avx512f,avx512vl")]
3868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3869#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
3870pub fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
3871 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_ps(a, b, c), no:c) }
3872}
3873
3874/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.
3875///
3876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_pd&expand=2631)
3877#[inline]
3878#[target_feature(enable = "avx512f")]
3879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3880#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3881pub fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3882 unsafe { simd_fma(x:a, y:b, z:simd_neg(c)) }
3883}
3884
3885/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3886///
3887/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_pd&expand=2632)
3888#[inline]
3889#[target_feature(enable = "avx512f")]
3890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3891#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3892pub fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
3893 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_pd(a, b, c), no:a) }
3894}
3895
3896/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3897///
3898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_pd&expand=2634)
3899#[inline]
3900#[target_feature(enable = "avx512f")]
3901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3902#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3903pub fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
3904 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_pd(a, b, c), no:_mm512_setzero_pd()) }
3905}
3906
3907/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3908///
3909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_pd&expand=2633)
3910#[inline]
3911#[target_feature(enable = "avx512f")]
3912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3913#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3914pub fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
3915 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsub_pd(a, b, c), no:c) }
3916}
3917
3918/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3919///
3920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsub_pd&expand=2628)
3921#[inline]
3922#[target_feature(enable = "avx512f,avx512vl")]
3923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3924#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3925pub fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
3926 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_pd(a, b, c), no:a) }
3927}
3928
3929/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3930///
3931/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsub_pd&expand=2630)
3932#[inline]
3933#[target_feature(enable = "avx512f,avx512vl")]
3934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3935#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3936pub fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
3937 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_pd(a, b, c), no:_mm256_setzero_pd()) }
3938}
3939
3940/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3941///
3942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsub_pd&expand=2629)
3943#[inline]
3944#[target_feature(enable = "avx512f,avx512vl")]
3945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3946#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3947pub fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
3948 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsub_pd(a, b, c), no:c) }
3949}
3950
3951/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
3952///
3953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsub_pd&expand=2624)
3954#[inline]
3955#[target_feature(enable = "avx512f,avx512vl")]
3956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3957#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3958pub fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
3959 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_pd(a, b, c), no:a) }
3960}
3961
3962/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
3963///
3964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsub_pd&expand=2626)
3965#[inline]
3966#[target_feature(enable = "avx512f,avx512vl")]
3967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3968#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3969pub fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
3970 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_pd(a, b, c), no:_mm_setzero_pd()) }
3971}
3972
3973/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
3974///
3975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsub_pd&expand=2625)
3976#[inline]
3977#[target_feature(enable = "avx512f,avx512vl")]
3978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3979#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
3980pub fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
3981 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsub_pd(a, b, c), no:c) }
3982}
3983
3984/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
3985///
3986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_ps&expand=2611)
3987#[inline]
3988#[target_feature(enable = "avx512f")]
3989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
3990#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
3991pub fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
3992 unsafe {
3993 let add: __m512 = simd_fma(x:a, y:b, z:c);
3994 let sub: __m512 = simd_fma(x:a, y:b, z:simd_neg(c));
3995 simd_shuffle!(
3996 add,
3997 sub,
3998 [16, 1, 18, 3, 20, 5, 22, 7, 24, 9, 26, 11, 28, 13, 30, 15]
3999 )
4000 }
4001}
4002
4003/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4004///
4005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_ps&expand=2612)
4006#[inline]
4007#[target_feature(enable = "avx512f")]
4008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4009#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4010pub fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4011 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ps(a, b, c), no:a) }
4012}
4013
4014/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4015///
4016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_ps&expand=2614)
4017#[inline]
4018#[target_feature(enable = "avx512f")]
4019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4020#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4021pub fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4022 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ps(a, b, c), no:_mm512_setzero_ps()) }
4023}
4024
4025/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4026///
4027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_ps&expand=2613)
4028#[inline]
4029#[target_feature(enable = "avx512f")]
4030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4031#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4032pub fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4033 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_ps(a, b, c), no:c) }
4034}
4035
4036/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4037///
4038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_ps&expand=2608)
4039#[inline]
4040#[target_feature(enable = "avx512f,avx512vl")]
4041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4042#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4043pub fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4044 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ps(a, b, c), no:a) }
4045}
4046
4047/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4048///
4049/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_ps&expand=2610)
4050#[inline]
4051#[target_feature(enable = "avx512f,avx512vl")]
4052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4053#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4054pub fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4055 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ps(a, b, c), no:_mm256_setzero_ps()) }
4056}
4057
4058/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4059///
4060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_ps&expand=2609)
4061#[inline]
4062#[target_feature(enable = "avx512f,avx512vl")]
4063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4064#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4065pub fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4066 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_ps(a, b, c), no:c) }
4067}
4068
4069/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4070///
4071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_ps&expand=2604)
4072#[inline]
4073#[target_feature(enable = "avx512f,avx512vl")]
4074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4075#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4076pub fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4077 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ps(a, b, c), no:a) }
4078}
4079
4080/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4081///
4082/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_fmaddsub_ps&expand=2606)
4083#[inline]
4084#[target_feature(enable = "avx512f,avx512vl")]
4085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4086#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4087pub fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4088 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ps(a, b, c), no:_mm_setzero_ps()) }
4089}
4090
4091/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4092///
4093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_ps&expand=2605)
4094#[inline]
4095#[target_feature(enable = "avx512f,avx512vl")]
4096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4097#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
4098pub fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4099 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_ps(a, b, c), no:c) }
4100}
4101
4102/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.
4103///
4104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_pd&expand=2599)
4105#[inline]
4106#[target_feature(enable = "avx512f")]
4107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4108#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4109pub fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4110 unsafe {
4111 let add: __m512d = simd_fma(x:a, y:b, z:c);
4112 let sub: __m512d = simd_fma(x:a, y:b, z:simd_neg(c));
4113 simd_shuffle!(add, sub, [8, 1, 10, 3, 12, 5, 14, 7])
4114 }
4115}
4116
4117/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4118///
4119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_pd&expand=2600)
4120#[inline]
4121#[target_feature(enable = "avx512f")]
4122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4123#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4124pub fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4125 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_pd(a, b, c), no:a) }
4126}
4127
4128/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4129///
4130/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_pd&expand=2602)
4131#[inline]
4132#[target_feature(enable = "avx512f")]
4133#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4134#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4135pub fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4136 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_pd(a, b, c), no:_mm512_setzero_pd()) }
4137}
4138
4139/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4140///
4141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_pd&expand=2613)
4142#[inline]
4143#[target_feature(enable = "avx512f")]
4144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4145#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4146pub fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4147 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmaddsub_pd(a, b, c), no:c) }
4148}
4149
4150/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4151///
4152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmaddsub_pd&expand=2596)
4153#[inline]
4154#[target_feature(enable = "avx512f,avx512vl")]
4155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4156#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4157pub fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4158 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_pd(a, b, c), no:a) }
4159}
4160
4161/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4162///
4163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmaddsub_pd&expand=2598)
4164#[inline]
4165#[target_feature(enable = "avx512f,avx512vl")]
4166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4167#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4168pub fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4169 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_pd(a, b, c), no:_mm256_setzero_pd()) }
4170}
4171
4172/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4173///
4174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmaddsub_pd&expand=2597)
4175#[inline]
4176#[target_feature(enable = "avx512f,avx512vl")]
4177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4178#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4179pub fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4180 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmaddsub_pd(a, b, c), no:c) }
4181}
4182
4183/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4184///
4185/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmaddsub_pd&expand=2592)
4186#[inline]
4187#[target_feature(enable = "avx512f,avx512vl")]
4188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4189#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4190pub fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4191 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_pd(a, b, c), no:a) }
4192}
4193
4194/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4195///
4196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmaddsub_pd&expand=2594)
4197#[inline]
4198#[target_feature(enable = "avx512f,avx512vl")]
4199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4200#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4201pub fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4202 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_pd(a, b, c), no:_mm_setzero_pd()) }
4203}
4204
4205/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4206///
4207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmaddsub_pd&expand=2593)
4208#[inline]
4209#[target_feature(enable = "avx512f,avx512vl")]
4210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4211#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
4212pub fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4213 unsafe { simd_select_bitmask(m:k, yes:_mm_fmaddsub_pd(a, b, c), no:c) }
4214}
4215
4216/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4217///
4218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_ps&expand=2691)
4219#[inline]
4220#[target_feature(enable = "avx512f")]
4221#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4222#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4223pub fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4224 unsafe {
4225 let add: __m512 = simd_fma(x:a, y:b, z:c);
4226 let sub: __m512 = simd_fma(x:a, y:b, z:simd_neg(c));
4227 simd_shuffle!(
4228 add,
4229 sub,
4230 [0, 17, 2, 19, 4, 21, 6, 23, 8, 25, 10, 27, 12, 29, 14, 31]
4231 )
4232 }
4233}
4234
4235/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4236///
4237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_ps&expand=2692)
4238#[inline]
4239#[target_feature(enable = "avx512f")]
4240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4241#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4242pub fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4243 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ps(a, b, c), no:a) }
4244}
4245
4246/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4247///
4248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_ps&expand=2694)
4249#[inline]
4250#[target_feature(enable = "avx512f")]
4251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4252#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4253pub fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4254 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ps(a, b, c), no:_mm512_setzero_ps()) }
4255}
4256
4257/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4258///
4259/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_ps&expand=2693)
4260#[inline]
4261#[target_feature(enable = "avx512f")]
4262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4263#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4264pub fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4265 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_ps(a, b, c), no:c) }
4266}
4267
4268/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4269///
4270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_ps&expand=2688)
4271#[inline]
4272#[target_feature(enable = "avx512f,avx512vl")]
4273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4274#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4275pub fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4276 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ps(a, b, c), no:a) }
4277}
4278
4279/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4280///
4281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_ps&expand=2690)
4282#[inline]
4283#[target_feature(enable = "avx512f,avx512vl")]
4284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4285#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4286pub fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4287 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ps(a, b, c), no:_mm256_setzero_ps()) }
4288}
4289
4290/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4291///
4292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_ps&expand=2689)
4293#[inline]
4294#[target_feature(enable = "avx512f,avx512vl")]
4295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4296#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4297pub fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4298 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_ps(a, b, c), no:c) }
4299}
4300
4301/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4302///
4303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_ps&expand=2684)
4304#[inline]
4305#[target_feature(enable = "avx512f,avx512vl")]
4306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4307#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4308pub fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4309 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ps(a, b, c), no:a) }
4310}
4311
4312/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4313///
4314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_ps&expand=2686)
4315#[inline]
4316#[target_feature(enable = "avx512f,avx512vl")]
4317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4318#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4319pub fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4320 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ps(a, b, c), no:_mm_setzero_ps()) }
4321}
4322
4323/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4324///
4325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_ps&expand=2685)
4326#[inline]
4327#[target_feature(enable = "avx512f,avx512vl")]
4328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4329#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
4330pub fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4331 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_ps(a, b, c), no:c) }
4332}
4333
4334/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.
4335///
4336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_pd&expand=2679)
4337#[inline]
4338#[target_feature(enable = "avx512f")]
4339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4340#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4341pub fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4342 unsafe {
4343 let add: __m512d = simd_fma(x:a, y:b, z:c);
4344 let sub: __m512d = simd_fma(x:a, y:b, z:simd_neg(c));
4345 simd_shuffle!(add, sub, [0, 9, 2, 11, 4, 13, 6, 15])
4346 }
4347}
4348
4349/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4350///
4351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_pd&expand=2680)
4352#[inline]
4353#[target_feature(enable = "avx512f")]
4354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4355#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4356pub fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4357 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_pd(a, b, c), no:a) }
4358}
4359
4360/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4361///
4362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_pd&expand=2682)
4363#[inline]
4364#[target_feature(enable = "avx512f")]
4365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4366#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4367pub fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4368 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_pd(a, b, c), no:_mm512_setzero_pd()) }
4369}
4370
4371/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4372///
4373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_pd&expand=2681)
4374#[inline]
4375#[target_feature(enable = "avx512f")]
4376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4377#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4378pub fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4379 unsafe { simd_select_bitmask(m:k, yes:_mm512_fmsubadd_pd(a, b, c), no:c) }
4380}
4381
4382/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4383///
4384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fmsubadd_pd&expand=2676)
4385#[inline]
4386#[target_feature(enable = "avx512f,avx512vl")]
4387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4388#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4389pub fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4390 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_pd(a, b, c), no:a) }
4391}
4392
4393/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4394///
4395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fmsubadd_pd&expand=2678)
4396#[inline]
4397#[target_feature(enable = "avx512f,avx512vl")]
4398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4399#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4400pub fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4401 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_pd(a, b, c), no:_mm256_setzero_pd()) }
4402}
4403
4404/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4405///
4406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fmsubadd_pd&expand=2677)
4407#[inline]
4408#[target_feature(enable = "avx512f,avx512vl")]
4409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4410#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4411pub fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4412 unsafe { simd_select_bitmask(m:k, yes:_mm256_fmsubadd_pd(a, b, c), no:c) }
4413}
4414
4415/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4416///
4417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fmsubadd_pd&expand=2672)
4418#[inline]
4419#[target_feature(enable = "avx512f,avx512vl")]
4420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4421#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4422pub fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4423 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_pd(a, b, c), no:a) }
4424}
4425
4426/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4427///
4428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fmsubadd_pd&expand=2674)
4429#[inline]
4430#[target_feature(enable = "avx512f,avx512vl")]
4431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4432#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4433pub fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4434 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_pd(a, b, c), no:_mm_setzero_pd()) }
4435}
4436
4437/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4438///
4439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fmsubadd_pd&expand=2673)
4440#[inline]
4441#[target_feature(enable = "avx512f,avx512vl")]
4442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4443#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
4444pub fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4445 unsafe { simd_select_bitmask(m:k, yes:_mm_fmsubadd_pd(a, b, c), no:c) }
4446}
4447
4448/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4449///
4450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_ps&expand=2723)
4451#[inline]
4452#[target_feature(enable = "avx512f")]
4453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4454#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4455pub fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4456 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
4457}
4458
4459/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4460///
4461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_ps&expand=2724)
4462#[inline]
4463#[target_feature(enable = "avx512f")]
4464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4465#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4466pub fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4467 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ps(a, b, c), no:a) }
4468}
4469
4470/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4471///
4472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_ps&expand=2726)
4473#[inline]
4474#[target_feature(enable = "avx512f")]
4475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4476#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4477pub fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4478 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ps(a, b, c), no:_mm512_setzero_ps()) }
4479}
4480
4481/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4482///
4483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_ps&expand=2725)
4484#[inline]
4485#[target_feature(enable = "avx512f")]
4486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4487#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4488pub fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4489 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_ps(a, b, c), no:c) }
4490}
4491
4492/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4493///
4494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_ps&expand=2720)
4495#[inline]
4496#[target_feature(enable = "avx512f,avx512vl")]
4497#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4498#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4499pub fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4500 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ps(a, b, c), no:a) }
4501}
4502
4503/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4504///
4505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_ps&expand=2722)
4506#[inline]
4507#[target_feature(enable = "avx512f,avx512vl")]
4508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4509#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4510pub fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4511 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ps(a, b, c), no:_mm256_setzero_ps()) }
4512}
4513
4514/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4515///
4516/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_ps&expand=2721)
4517#[inline]
4518#[target_feature(enable = "avx512f,avx512vl")]
4519#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4520#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4521pub fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4522 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_ps(a, b, c), no:c) }
4523}
4524
4525/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4526///
4527/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_ps&expand=2716)
4528#[inline]
4529#[target_feature(enable = "avx512f,avx512vl")]
4530#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4531#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4532pub fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4533 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ps(a, b, c), no:a) }
4534}
4535
4536/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4537///
4538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_ps&expand=2718)
4539#[inline]
4540#[target_feature(enable = "avx512f,avx512vl")]
4541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4542#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4543pub fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4544 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ps(a, b, c), no:_mm_setzero_ps()) }
4545}
4546
4547/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4548///
4549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_ps&expand=2717)
4550#[inline]
4551#[target_feature(enable = "avx512f,avx512vl")]
4552#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4553#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
4554pub fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4555 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_ps(a, b, c), no:c) }
4556}
4557
4558/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.
4559///
4560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_pd&expand=2711)
4561#[inline]
4562#[target_feature(enable = "avx512f")]
4563#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4564#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4565pub fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4566 unsafe { simd_fma(x:simd_neg(a), y:b, z:c) }
4567}
4568
4569/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4570///
4571/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_pd&expand=2712)
4572#[inline]
4573#[target_feature(enable = "avx512f")]
4574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4575#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4576pub fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4577 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_pd(a, b, c), no:a) }
4578}
4579
4580/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4581///
4582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_pd&expand=2714)
4583#[inline]
4584#[target_feature(enable = "avx512f")]
4585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4586#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4587pub fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4588 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_pd(a, b, c), no:_mm512_setzero_pd()) }
4589}
4590
4591/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4592///
4593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_pd&expand=2713)
4594#[inline]
4595#[target_feature(enable = "avx512f")]
4596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4597#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4598pub fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4599 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmadd_pd(a, b, c), no:c) }
4600}
4601
4602/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4603///
4604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmadd_pd&expand=2708)
4605#[inline]
4606#[target_feature(enable = "avx512f,avx512vl")]
4607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4608#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4609pub fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4610 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_pd(a, b, c), no:a) }
4611}
4612
4613/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4614///
4615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmadd_pd&expand=2710)
4616#[inline]
4617#[target_feature(enable = "avx512f,avx512vl")]
4618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4619#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4620pub fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4621 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_pd(a, b, c), no:_mm256_setzero_pd()) }
4622}
4623
4624/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4625///
4626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmadd_pd&expand=2709)
4627#[inline]
4628#[target_feature(enable = "avx512f,avx512vl")]
4629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4630#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4631pub fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4632 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmadd_pd(a, b, c), no:c) }
4633}
4634
4635/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4636///
4637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmadd_pd&expand=2704)
4638#[inline]
4639#[target_feature(enable = "avx512f,avx512vl")]
4640#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4641#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4642pub fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4643 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_pd(a, b, c), no:a) }
4644}
4645
4646/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4647///
4648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmadd_pd&expand=2706)
4649#[inline]
4650#[target_feature(enable = "avx512f,avx512vl")]
4651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4652#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4653pub fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4654 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_pd(a, b, c), no:_mm_setzero_pd()) }
4655}
4656
4657/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4658///
4659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmadd_pd&expand=2705)
4660#[inline]
4661#[target_feature(enable = "avx512f,avx512vl")]
4662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4663#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
4664pub fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4665 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmadd_pd(a, b, c), no:c) }
4666}
4667
4668/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4669///
4670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_ps&expand=2771)
4671#[inline]
4672#[target_feature(enable = "avx512f")]
4673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4674#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4675pub fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
4676 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
4677}
4678
4679/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4680///
4681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_ps&expand=2772)
4682#[inline]
4683#[target_feature(enable = "avx512f")]
4684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4685#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4686pub fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
4687 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ps(a, b, c), no:a) }
4688}
4689
4690/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4691///
4692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_ps&expand=2774)
4693#[inline]
4694#[target_feature(enable = "avx512f")]
4695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4696#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4697pub fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
4698 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ps(a, b, c), no:_mm512_setzero_ps()) }
4699}
4700
4701/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4702///
4703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_ps&expand=2773)
4704#[inline]
4705#[target_feature(enable = "avx512f")]
4706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4707#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4708pub fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
4709 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_ps(a, b, c), no:c) }
4710}
4711
4712/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4713///
4714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_ps&expand=2768)
4715#[inline]
4716#[target_feature(enable = "avx512f,avx512vl")]
4717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4718#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4719pub fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
4720 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ps(a, b, c), no:a) }
4721}
4722
4723/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4724///
4725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_ps&expand=2770)
4726#[inline]
4727#[target_feature(enable = "avx512f,avx512vl")]
4728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4729#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4730pub fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
4731 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ps(a, b, c), no:_mm256_setzero_ps()) }
4732}
4733
4734/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4735///
4736/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_ps&expand=2769)
4737#[inline]
4738#[target_feature(enable = "avx512f,avx512vl")]
4739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4740#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4741pub fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
4742 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_ps(a, b, c), no:c) }
4743}
4744
4745/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4746///
4747/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_ps&expand=2764)
4748#[inline]
4749#[target_feature(enable = "avx512f,avx512vl")]
4750#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4751#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4752pub fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
4753 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ps(a, b, c), no:a) }
4754}
4755
4756/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4757///
4758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_ps&expand=2766)
4759#[inline]
4760#[target_feature(enable = "avx512f,avx512vl")]
4761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4762#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4763pub fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
4764 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ps(a, b, c), no:_mm_setzero_ps()) }
4765}
4766
4767/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4768///
4769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_ps&expand=2765)
4770#[inline]
4771#[target_feature(enable = "avx512f,avx512vl")]
4772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4773#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
4774pub fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
4775 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_ps(a, b, c), no:c) }
4776}
4777
4778/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.
4779///
4780/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_pd&expand=2759)
4781#[inline]
4782#[target_feature(enable = "avx512f")]
4783#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4784#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4785pub fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4786 unsafe { simd_fma(x:simd_neg(a), y:b, z:simd_neg(c)) }
4787}
4788
4789/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4790///
4791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_pd&expand=2760)
4792#[inline]
4793#[target_feature(enable = "avx512f")]
4794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4795#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4796pub fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
4797 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_pd(a, b, c), no:a) }
4798}
4799
4800/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4801///
4802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_pd&expand=2762)
4803#[inline]
4804#[target_feature(enable = "avx512f")]
4805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4806#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4807pub fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
4808 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_pd(a, b, c), no:_mm512_setzero_pd()) }
4809}
4810
4811/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4812///
4813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_pd&expand=2761)
4814#[inline]
4815#[target_feature(enable = "avx512f")]
4816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4817#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4818pub fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
4819 unsafe { simd_select_bitmask(m:k, yes:_mm512_fnmsub_pd(a, b, c), no:c) }
4820}
4821
4822/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4823///
4824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fnmsub_pd&expand=2756)
4825#[inline]
4826#[target_feature(enable = "avx512f,avx512vl")]
4827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4828#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4829pub fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
4830 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_pd(a, b, c), no:a) }
4831}
4832
4833/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4834///
4835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fnmsub_pd&expand=2758)
4836#[inline]
4837#[target_feature(enable = "avx512f,avx512vl")]
4838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4839#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4840pub fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
4841 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_pd(a, b, c), no:_mm256_setzero_pd()) }
4842}
4843
4844/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4845///
4846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask3_fnmsub_pd&expand=2757)
4847#[inline]
4848#[target_feature(enable = "avx512f,avx512vl")]
4849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4850#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4851pub fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
4852 unsafe { simd_select_bitmask(m:k, yes:_mm256_fnmsub_pd(a, b, c), no:c) }
4853}
4854
4855/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
4856///
4857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fnmsub_pd&expand=2752)
4858#[inline]
4859#[target_feature(enable = "avx512f,avx512vl")]
4860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4861#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4862pub fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
4863 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_pd(a, b, c), no:a) }
4864}
4865
4866/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
4867///
4868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fnmsub_pd&expand=2754)
4869#[inline]
4870#[target_feature(enable = "avx512f,avx512vl")]
4871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4872#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4873pub fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
4874 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_pd(a, b, c), no:_mm_setzero_pd()) }
4875}
4876
4877/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).
4878///
4879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask3_fnmsub_pd&expand=2753)
4880#[inline]
4881#[target_feature(enable = "avx512f,avx512vl")]
4882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4883#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
4884pub fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
4885 unsafe { simd_select_bitmask(m:k, yes:_mm_fnmsub_pd(a, b, c), no:c) }
4886}
4887
4888/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4889///
4890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_ps&expand=4502)
4891#[inline]
4892#[target_feature(enable = "avx512f")]
4893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4894#[cfg_attr(test, assert_instr(vrcp14ps))]
4895pub fn _mm512_rcp14_ps(a: __m512) -> __m512 {
4896 unsafe { transmute(src:vrcp14ps(a.as_f32x16(), src:f32x16::ZERO, m:0b11111111_11111111)) }
4897}
4898
4899/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4900///
4901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_ps&expand=4500)
4902#[inline]
4903#[target_feature(enable = "avx512f")]
4904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4905#[cfg_attr(test, assert_instr(vrcp14ps))]
4906pub fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
4907 unsafe { transmute(src:vrcp14ps(a.as_f32x16(), src.as_f32x16(), m:k)) }
4908}
4909
4910/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4911///
4912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_ps&expand=4501)
4913#[inline]
4914#[target_feature(enable = "avx512f")]
4915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4916#[cfg_attr(test, assert_instr(vrcp14ps))]
4917pub fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
4918 unsafe { transmute(src:vrcp14ps(a.as_f32x16(), src:f32x16::ZERO, m:k)) }
4919}
4920
4921/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4922///
4923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_ps&expand=4499)
4924#[inline]
4925#[target_feature(enable = "avx512f,avx512vl")]
4926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4927#[cfg_attr(test, assert_instr(vrcp14ps))]
4928pub fn _mm256_rcp14_ps(a: __m256) -> __m256 {
4929 unsafe { transmute(src:vrcp14ps256(a.as_f32x8(), src:f32x8::ZERO, m:0b11111111)) }
4930}
4931
4932/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4933///
4934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_ps&expand=4497)
4935#[inline]
4936#[target_feature(enable = "avx512f,avx512vl")]
4937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4938#[cfg_attr(test, assert_instr(vrcp14ps))]
4939pub fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
4940 unsafe { transmute(src:vrcp14ps256(a.as_f32x8(), src.as_f32x8(), m:k)) }
4941}
4942
4943/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4944///
4945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_ps&expand=4498)
4946#[inline]
4947#[target_feature(enable = "avx512f,avx512vl")]
4948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4949#[cfg_attr(test, assert_instr(vrcp14ps))]
4950pub fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
4951 unsafe { transmute(src:vrcp14ps256(a.as_f32x8(), src:f32x8::ZERO, m:k)) }
4952}
4953
4954/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4955///
4956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_ps&expand=4496)
4957#[inline]
4958#[target_feature(enable = "avx512f,avx512vl")]
4959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4960#[cfg_attr(test, assert_instr(vrcp14ps))]
4961pub fn _mm_rcp14_ps(a: __m128) -> __m128 {
4962 unsafe { transmute(src:vrcp14ps128(a.as_f32x4(), src:f32x4::ZERO, m:0b00001111)) }
4963}
4964
4965/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4966///
4967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_ps&expand=4494)
4968#[inline]
4969#[target_feature(enable = "avx512f,avx512vl")]
4970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4971#[cfg_attr(test, assert_instr(vrcp14ps))]
4972pub fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
4973 unsafe { transmute(src:vrcp14ps128(a.as_f32x4(), src.as_f32x4(), m:k)) }
4974}
4975
4976/// Compute the approximate reciprocal of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4977///
4978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_ps&expand=4495)
4979#[inline]
4980#[target_feature(enable = "avx512f,avx512vl")]
4981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4982#[cfg_attr(test, assert_instr(vrcp14ps))]
4983pub fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
4984 unsafe { transmute(src:vrcp14ps128(a.as_f32x4(), src:f32x4::ZERO, m:k)) }
4985}
4986
4987/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
4988///
4989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rcp14_pd&expand=4493)
4990#[inline]
4991#[target_feature(enable = "avx512f")]
4992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
4993#[cfg_attr(test, assert_instr(vrcp14pd))]
4994pub fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
4995 unsafe { transmute(src:vrcp14pd(a.as_f64x8(), src:f64x8::ZERO, m:0b11111111)) }
4996}
4997
4998/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
4999///
5000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rcp14_pd&expand=4491)
5001#[inline]
5002#[target_feature(enable = "avx512f")]
5003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5004#[cfg_attr(test, assert_instr(vrcp14pd))]
5005pub fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5006 unsafe { transmute(src:vrcp14pd(a.as_f64x8(), src.as_f64x8(), m:k)) }
5007}
5008
5009/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5010///
5011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rcp14_pd&expand=4492)
5012#[inline]
5013#[target_feature(enable = "avx512f")]
5014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5015#[cfg_attr(test, assert_instr(vrcp14pd))]
5016pub fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
5017 unsafe { transmute(src:vrcp14pd(a.as_f64x8(), src:f64x8::ZERO, m:k)) }
5018}
5019
5020/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5021///
5022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rcp14_pd&expand=4490)
5023#[inline]
5024#[target_feature(enable = "avx512f,avx512vl")]
5025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5026#[cfg_attr(test, assert_instr(vrcp14pd))]
5027pub fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
5028 unsafe { transmute(src:vrcp14pd256(a.as_f64x4(), src:f64x4::ZERO, m:0b00001111)) }
5029}
5030
5031/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5032///
5033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rcp14_pd&expand=4488)
5034#[inline]
5035#[target_feature(enable = "avx512f,avx512vl")]
5036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5037#[cfg_attr(test, assert_instr(vrcp14pd))]
5038pub fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5039 unsafe { transmute(src:vrcp14pd256(a.as_f64x4(), src.as_f64x4(), m:k)) }
5040}
5041
5042/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5043///
5044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rcp14_pd&expand=4489)
5045#[inline]
5046#[target_feature(enable = "avx512f,avx512vl")]
5047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5048#[cfg_attr(test, assert_instr(vrcp14pd))]
5049pub fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
5050 unsafe { transmute(src:vrcp14pd256(a.as_f64x4(), src:f64x4::ZERO, m:k)) }
5051}
5052
5053/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5054///
5055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rcp14_pd&expand=4487)
5056#[inline]
5057#[target_feature(enable = "avx512f,avx512vl")]
5058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5059#[cfg_attr(test, assert_instr(vrcp14pd))]
5060pub fn _mm_rcp14_pd(a: __m128d) -> __m128d {
5061 unsafe { transmute(src:vrcp14pd128(a.as_f64x2(), src:f64x2::ZERO, m:0b00000011)) }
5062}
5063
5064/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5065///
5066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rcp14_pd&expand=4485)
5067#[inline]
5068#[target_feature(enable = "avx512f,avx512vl")]
5069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5070#[cfg_attr(test, assert_instr(vrcp14pd))]
5071pub fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5072 unsafe { transmute(src:vrcp14pd128(a.as_f64x2(), src.as_f64x2(), m:k)) }
5073}
5074
5075/// Compute the approximate reciprocal of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5076///
5077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rcp14_pd&expand=4486)
5078#[inline]
5079#[target_feature(enable = "avx512f,avx512vl")]
5080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5081#[cfg_attr(test, assert_instr(vrcp14pd))]
5082pub fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
5083 unsafe { transmute(src:vrcp14pd128(a.as_f64x2(), src:f64x2::ZERO, m:k)) }
5084}
5085
5086/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5087///
5088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_ps&expand=4819)
5089#[inline]
5090#[target_feature(enable = "avx512f")]
5091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5092#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5093pub fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
5094 unsafe { transmute(src:vrsqrt14ps(a.as_f32x16(), src:f32x16::ZERO, m:0b11111111_11111111)) }
5095}
5096
5097/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5098///
5099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_ps&expand=4817)
5100#[inline]
5101#[target_feature(enable = "avx512f")]
5102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5103#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5104pub fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5105 unsafe { transmute(src:vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), m:k)) }
5106}
5107
5108/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5109///
5110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_ps&expand=4818)
5111#[inline]
5112#[target_feature(enable = "avx512f")]
5113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5114#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5115pub fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
5116 unsafe { transmute(src:vrsqrt14ps(a.as_f32x16(), src:f32x16::ZERO, m:k)) }
5117}
5118
5119/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5120///
5121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_ps)
5122#[inline]
5123#[target_feature(enable = "avx512f,avx512vl")]
5124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5125#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5126pub fn _mm256_rsqrt14_ps(a: __m256) -> __m256 {
5127 unsafe { transmute(src:vrsqrt14ps256(a.as_f32x8(), src:f32x8::ZERO, m:0b11111111)) }
5128}
5129
5130/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5131///
5132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_ps&expand=4815)
5133#[inline]
5134#[target_feature(enable = "avx512f,avx512vl")]
5135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5136#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5137pub fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5138 unsafe { transmute(src:vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), m:k)) }
5139}
5140
5141/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5142///
5143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_ps&expand=4816)
5144#[inline]
5145#[target_feature(enable = "avx512f,avx512vl")]
5146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5147#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5148pub fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
5149 unsafe { transmute(src:vrsqrt14ps256(a.as_f32x8(), src:f32x8::ZERO, m:k)) }
5150}
5151
5152/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5153///
5154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_ps)
5155#[inline]
5156#[target_feature(enable = "avx512f,avx512vl")]
5157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5158#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5159pub fn _mm_rsqrt14_ps(a: __m128) -> __m128 {
5160 unsafe { transmute(src:vrsqrt14ps128(a.as_f32x4(), src:f32x4::ZERO, m:0b00001111)) }
5161}
5162
5163/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5164///
5165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_ps&expand=4813)
5166#[inline]
5167#[target_feature(enable = "avx512f,avx512vl")]
5168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5169#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5170pub fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5171 unsafe { transmute(src:vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), m:k)) }
5172}
5173
5174/// Compute the approximate reciprocal square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5175///
5176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_ps&expand=4814)
5177#[inline]
5178#[target_feature(enable = "avx512f,avx512vl")]
5179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5180#[cfg_attr(test, assert_instr(vrsqrt14ps))]
5181pub fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
5182 unsafe { transmute(src:vrsqrt14ps128(a.as_f32x4(), src:f32x4::ZERO, m:k)) }
5183}
5184
5185/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5186///
5187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rsqrt14_pd&expand=4812)
5188#[inline]
5189#[target_feature(enable = "avx512f")]
5190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5191#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5192pub fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
5193 unsafe { transmute(src:vrsqrt14pd(a.as_f64x8(), src:f64x8::ZERO, m:0b11111111)) }
5194}
5195
5196/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5197///
5198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rsqrt14_pd&expand=4810)
5199#[inline]
5200#[target_feature(enable = "avx512f")]
5201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5202#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5203pub fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5204 unsafe { transmute(src:vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), m:k)) }
5205}
5206
5207/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5208///
5209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rsqrt14_pd&expand=4811)
5210#[inline]
5211#[target_feature(enable = "avx512f")]
5212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5213#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5214pub fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
5215 unsafe { transmute(src:vrsqrt14pd(a.as_f64x8(), src:f64x8::ZERO, m:k)) }
5216}
5217
5218/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5219///
5220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rsqrt14_pd)
5221#[inline]
5222#[target_feature(enable = "avx512f,avx512vl")]
5223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5224#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5225pub fn _mm256_rsqrt14_pd(a: __m256d) -> __m256d {
5226 unsafe { transmute(src:vrsqrt14pd256(a.as_f64x4(), src:f64x4::ZERO, m:0b00001111)) }
5227}
5228
5229/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5230///
5231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rsqrt14_pd&expand=4808)
5232#[inline]
5233#[target_feature(enable = "avx512f,avx512vl")]
5234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5235#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5236pub fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5237 unsafe { transmute(src:vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), m:k)) }
5238}
5239
5240/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5241///
5242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rsqrt14_pd&expand=4809)
5243#[inline]
5244#[target_feature(enable = "avx512f,avx512vl")]
5245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5246#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5247pub fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
5248 unsafe { transmute(src:vrsqrt14pd256(a.as_f64x4(), src:f64x4::ZERO, m:k)) }
5249}
5250
5251/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. The maximum relative error for this approximation is less than 2^-14.
5252///
5253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rsqrt14_pd)
5254#[inline]
5255#[target_feature(enable = "avx512f,avx512vl")]
5256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5257#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5258pub fn _mm_rsqrt14_pd(a: __m128d) -> __m128d {
5259 unsafe { transmute(src:vrsqrt14pd128(a.as_f64x2(), src:f64x2::ZERO, m:0b00000011)) }
5260}
5261
5262/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5263///
5264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rsqrt14_pd&expand=4806)
5265#[inline]
5266#[target_feature(enable = "avx512f,avx512vl")]
5267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5268#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5269pub fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5270 unsafe { transmute(src:vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), m:k)) }
5271}
5272
5273/// Compute the approximate reciprocal square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). The maximum relative error for this approximation is less than 2^-14.
5274///
5275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rsqrt14_pd&expand=4807)
5276#[inline]
5277#[target_feature(enable = "avx512f,avx512vl")]
5278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5279#[cfg_attr(test, assert_instr(vrsqrt14pd))]
5280pub fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
5281 unsafe { transmute(src:vrsqrt14pd128(a.as_f64x2(), src:f64x2::ZERO, m:k)) }
5282}
5283
5284/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5285///
5286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_ps&expand=2844)
5287#[inline]
5288#[target_feature(enable = "avx512f")]
5289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5290#[cfg_attr(test, assert_instr(vgetexpps))]
5291pub fn _mm512_getexp_ps(a: __m512) -> __m512 {
5292 unsafe {
5293 transmute(src:vgetexpps(
5294 a.as_f32x16(),
5295 src:f32x16::ZERO,
5296 m:0b11111111_11111111,
5297 _MM_FROUND_CUR_DIRECTION,
5298 ))
5299 }
5300}
5301
5302/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5303///
5304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_ps&expand=2845)
5305#[inline]
5306#[target_feature(enable = "avx512f")]
5307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5308#[cfg_attr(test, assert_instr(vgetexpps))]
5309pub fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5310 unsafe {
5311 transmute(src:vgetexpps(
5312 a.as_f32x16(),
5313 src.as_f32x16(),
5314 m:k,
5315 _MM_FROUND_CUR_DIRECTION,
5316 ))
5317 }
5318}
5319
5320/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5321///
5322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_ps&expand=2846)
5323#[inline]
5324#[target_feature(enable = "avx512f")]
5325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5326#[cfg_attr(test, assert_instr(vgetexpps))]
5327pub fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
5328 unsafe {
5329 transmute(src:vgetexpps(
5330 a.as_f32x16(),
5331 src:f32x16::ZERO,
5332 m:k,
5333 _MM_FROUND_CUR_DIRECTION,
5334 ))
5335 }
5336}
5337
5338/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5339///
5340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_ps&expand=2841)
5341#[inline]
5342#[target_feature(enable = "avx512f,avx512vl")]
5343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5344#[cfg_attr(test, assert_instr(vgetexpps))]
5345pub fn _mm256_getexp_ps(a: __m256) -> __m256 {
5346 unsafe { transmute(src:vgetexpps256(a.as_f32x8(), src:f32x8::ZERO, m:0b11111111)) }
5347}
5348
5349/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5350///
5351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_ps&expand=2842)
5352#[inline]
5353#[target_feature(enable = "avx512f,avx512vl")]
5354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5355#[cfg_attr(test, assert_instr(vgetexpps))]
5356pub fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5357 unsafe { transmute(src:vgetexpps256(a.as_f32x8(), src.as_f32x8(), m:k)) }
5358}
5359
5360/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5361///
5362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_ps&expand=2843)
5363#[inline]
5364#[target_feature(enable = "avx512f,avx512vl")]
5365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5366#[cfg_attr(test, assert_instr(vgetexpps))]
5367pub fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
5368 unsafe { transmute(src:vgetexpps256(a.as_f32x8(), src:f32x8::ZERO, m:k)) }
5369}
5370
5371/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5372///
5373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_ps&expand=2838)
5374#[inline]
5375#[target_feature(enable = "avx512f,avx512vl")]
5376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5377#[cfg_attr(test, assert_instr(vgetexpps))]
5378pub fn _mm_getexp_ps(a: __m128) -> __m128 {
5379 unsafe { transmute(src:vgetexpps128(a.as_f32x4(), src:f32x4::ZERO, m:0b00001111)) }
5380}
5381
5382/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5383///
5384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_ps&expand=2839)
5385#[inline]
5386#[target_feature(enable = "avx512f,avx512vl")]
5387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5388#[cfg_attr(test, assert_instr(vgetexpps))]
5389pub fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5390 unsafe { transmute(src:vgetexpps128(a.as_f32x4(), src.as_f32x4(), m:k)) }
5391}
5392
5393/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5394///
5395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_ps&expand=2840)
5396#[inline]
5397#[target_feature(enable = "avx512f,avx512vl")]
5398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5399#[cfg_attr(test, assert_instr(vgetexpps))]
5400pub fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
5401 unsafe { transmute(src:vgetexpps128(a.as_f32x4(), src:f32x4::ZERO, m:k)) }
5402}
5403
5404/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5405///
5406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_pd&expand=2835)
5407#[inline]
5408#[target_feature(enable = "avx512f")]
5409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5410#[cfg_attr(test, assert_instr(vgetexppd))]
5411pub fn _mm512_getexp_pd(a: __m512d) -> __m512d {
5412 unsafe {
5413 transmute(src:vgetexppd(
5414 a.as_f64x8(),
5415 src:f64x8::ZERO,
5416 m:0b11111111,
5417 _MM_FROUND_CUR_DIRECTION,
5418 ))
5419 }
5420}
5421
5422/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5423///
5424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_pd&expand=2836)
5425#[inline]
5426#[target_feature(enable = "avx512f")]
5427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5428#[cfg_attr(test, assert_instr(vgetexppd))]
5429pub fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
5430 unsafe {
5431 transmute(src:vgetexppd(
5432 a.as_f64x8(),
5433 src.as_f64x8(),
5434 m:k,
5435 _MM_FROUND_CUR_DIRECTION,
5436 ))
5437 }
5438}
5439
5440/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5441///
5442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_pd&expand=2837)
5443#[inline]
5444#[target_feature(enable = "avx512f")]
5445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5446#[cfg_attr(test, assert_instr(vgetexppd))]
5447pub fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
5448 unsafe {
5449 transmute(src:vgetexppd(
5450 a.as_f64x8(),
5451 src:f64x8::ZERO,
5452 m:k,
5453 _MM_FROUND_CUR_DIRECTION,
5454 ))
5455 }
5456}
5457
5458/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5459///
5460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getexp_pd&expand=2832)
5461#[inline]
5462#[target_feature(enable = "avx512f,avx512vl")]
5463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5464#[cfg_attr(test, assert_instr(vgetexppd))]
5465pub fn _mm256_getexp_pd(a: __m256d) -> __m256d {
5466 unsafe { transmute(src:vgetexppd256(a.as_f64x4(), src:f64x4::ZERO, m:0b00001111)) }
5467}
5468
5469/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5470///
5471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getexp_pd&expand=2833)
5472#[inline]
5473#[target_feature(enable = "avx512f,avx512vl")]
5474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5475#[cfg_attr(test, assert_instr(vgetexppd))]
5476pub fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
5477 unsafe { transmute(src:vgetexppd256(a.as_f64x4(), src.as_f64x4(), m:k)) }
5478}
5479
5480/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5481///
5482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getexp_pd&expand=2834)
5483#[inline]
5484#[target_feature(enable = "avx512f,avx512vl")]
5485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5486#[cfg_attr(test, assert_instr(vgetexppd))]
5487pub fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
5488 unsafe { transmute(src:vgetexppd256(a.as_f64x4(), src:f64x4::ZERO, m:k)) }
5489}
5490
5491/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.
5492///
5493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getexp_pd&expand=2829)
5494#[inline]
5495#[target_feature(enable = "avx512f,avx512vl")]
5496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5497#[cfg_attr(test, assert_instr(vgetexppd))]
5498pub fn _mm_getexp_pd(a: __m128d) -> __m128d {
5499 unsafe { transmute(src:vgetexppd128(a.as_f64x2(), src:f64x2::ZERO, m:0b00000011)) }
5500}
5501
5502/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5503///
5504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getexp_pd&expand=2830)
5505#[inline]
5506#[target_feature(enable = "avx512f,avx512vl")]
5507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5508#[cfg_attr(test, assert_instr(vgetexppd))]
5509pub fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5510 unsafe { transmute(src:vgetexppd128(a.as_f64x2(), src.as_f64x2(), m:k)) }
5511}
5512
5513/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.
5514///
5515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getexp_pd&expand=2831)
5516#[inline]
5517#[target_feature(enable = "avx512f,avx512vl")]
5518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5519#[cfg_attr(test, assert_instr(vgetexppd))]
5520pub fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
5521 unsafe { transmute(src:vgetexppd128(a.as_f64x2(), src:f64x2::ZERO, m:k)) }
5522}
5523
5524/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5525/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5526/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5527/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5528/// * [`_MM_FROUND_TO_POS_INF`] : round up
5529/// * [`_MM_FROUND_TO_ZERO`] : truncate
5530/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5531///
5532/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_ps&expand=4784)
5533#[inline]
5534#[target_feature(enable = "avx512f")]
5535#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5536#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5537#[rustc_legacy_const_generics(1)]
5538pub fn _mm512_roundscale_ps<const IMM8: i32>(a: __m512) -> __m512 {
5539 unsafe {
5540 static_assert_uimm_bits!(IMM8, 8);
5541 let a: f32x16 = a.as_f32x16();
5542 let r: f32x16 = vrndscaleps(
5543 a,
5544 IMM8,
5545 src:f32x16::ZERO,
5546 mask:0b11111111_11111111,
5547 _MM_FROUND_CUR_DIRECTION,
5548 );
5549 transmute(src:r)
5550 }
5551}
5552
5553/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5554/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5555/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5556/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5557/// * [`_MM_FROUND_TO_POS_INF`] : round up
5558/// * [`_MM_FROUND_TO_ZERO`] : truncate
5559/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5560///
5561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_ps&expand=4782)
5562#[inline]
5563#[target_feature(enable = "avx512f")]
5564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5565#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5566#[rustc_legacy_const_generics(3)]
5567pub fn _mm512_mask_roundscale_ps<const IMM8: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
5568 unsafe {
5569 static_assert_uimm_bits!(IMM8, 8);
5570 let a: f32x16 = a.as_f32x16();
5571 let src: f32x16 = src.as_f32x16();
5572 let r: f32x16 = vrndscaleps(a, IMM8, src, mask:k, _MM_FROUND_CUR_DIRECTION);
5573 transmute(src:r)
5574 }
5575}
5576
5577/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5578/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5579/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5580/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5581/// * [`_MM_FROUND_TO_POS_INF`] : round up
5582/// * [`_MM_FROUND_TO_ZERO`] : truncate
5583/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5584///
5585/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_ps&expand=4783)
5586#[inline]
5587#[target_feature(enable = "avx512f")]
5588#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5589#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5590#[rustc_legacy_const_generics(2)]
5591pub fn _mm512_maskz_roundscale_ps<const IMM8: i32>(k: __mmask16, a: __m512) -> __m512 {
5592 unsafe {
5593 static_assert_uimm_bits!(IMM8, 8);
5594 let a: f32x16 = a.as_f32x16();
5595 let r: f32x16 = vrndscaleps(a, IMM8, src:f32x16::ZERO, mask:k, _MM_FROUND_CUR_DIRECTION);
5596 transmute(src:r)
5597 }
5598}
5599
5600/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5601/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5602/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5603/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5604/// * [`_MM_FROUND_TO_POS_INF`] : round up
5605/// * [`_MM_FROUND_TO_ZERO`] : truncate
5606/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5607///
5608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_ps&expand=4781)
5609#[inline]
5610#[target_feature(enable = "avx512f,avx512vl")]
5611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5612#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5613#[rustc_legacy_const_generics(1)]
5614pub fn _mm256_roundscale_ps<const IMM8: i32>(a: __m256) -> __m256 {
5615 unsafe {
5616 static_assert_uimm_bits!(IMM8, 8);
5617 let a: f32x8 = a.as_f32x8();
5618 let r: f32x8 = vrndscaleps256(a, IMM8, src:f32x8::ZERO, mask:0b11111111);
5619 transmute(src:r)
5620 }
5621}
5622
5623/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5624/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5625/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5626/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5627/// * [`_MM_FROUND_TO_POS_INF`] : round up
5628/// * [`_MM_FROUND_TO_ZERO`] : truncate
5629/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5630///
5631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_ps&expand=4779)
5632#[inline]
5633#[target_feature(enable = "avx512f,avx512vl")]
5634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5635#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5636#[rustc_legacy_const_generics(3)]
5637pub fn _mm256_mask_roundscale_ps<const IMM8: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
5638 unsafe {
5639 static_assert_uimm_bits!(IMM8, 8);
5640 let a: f32x8 = a.as_f32x8();
5641 let src: f32x8 = src.as_f32x8();
5642 let r: f32x8 = vrndscaleps256(a, IMM8, src, mask:k);
5643 transmute(src:r)
5644 }
5645}
5646
5647/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5648/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5649/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5650/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5651/// * [`_MM_FROUND_TO_POS_INF`] : round up
5652/// * [`_MM_FROUND_TO_ZERO`] : truncate
5653/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5654///
5655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_ps&expand=4780)
5656#[inline]
5657#[target_feature(enable = "avx512f,avx512vl")]
5658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5659#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5660#[rustc_legacy_const_generics(2)]
5661pub fn _mm256_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m256 {
5662 unsafe {
5663 static_assert_uimm_bits!(IMM8, 8);
5664 let a: f32x8 = a.as_f32x8();
5665 let r: f32x8 = vrndscaleps256(a, IMM8, src:f32x8::ZERO, mask:k);
5666 transmute(src:r)
5667 }
5668}
5669
5670/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5671/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5672/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5673/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5674/// * [`_MM_FROUND_TO_POS_INF`] : round up
5675/// * [`_MM_FROUND_TO_ZERO`] : truncate
5676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5677///
5678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_ps&expand=4778)
5679#[inline]
5680#[target_feature(enable = "avx512f,avx512vl")]
5681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5682#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 250))]
5683#[rustc_legacy_const_generics(1)]
5684pub fn _mm_roundscale_ps<const IMM8: i32>(a: __m128) -> __m128 {
5685 unsafe {
5686 static_assert_uimm_bits!(IMM8, 8);
5687 let a: f32x4 = a.as_f32x4();
5688 let r: f32x4 = vrndscaleps128(a, IMM8, src:f32x4::ZERO, mask:0b00001111);
5689 transmute(src:r)
5690 }
5691}
5692
5693/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5694/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5695/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5696/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5697/// * [`_MM_FROUND_TO_POS_INF`] : round up
5698/// * [`_MM_FROUND_TO_ZERO`] : truncate
5699/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5700///
5701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_ps&expand=4776)
5702#[inline]
5703#[target_feature(enable = "avx512f,avx512vl")]
5704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5705#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5706#[rustc_legacy_const_generics(3)]
5707pub fn _mm_mask_roundscale_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
5708 unsafe {
5709 static_assert_uimm_bits!(IMM8, 8);
5710 let a: f32x4 = a.as_f32x4();
5711 let src: f32x4 = src.as_f32x4();
5712 let r: f32x4 = vrndscaleps128(a, IMM8, src, mask:k);
5713 transmute(src:r)
5714 }
5715}
5716
5717/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5718/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5719/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5720/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5721/// * [`_MM_FROUND_TO_POS_INF`] : round up
5722/// * [`_MM_FROUND_TO_ZERO`] : truncate
5723/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5724///
5725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_ps&expand=4777)
5726#[inline]
5727#[target_feature(enable = "avx512f,avx512vl")]
5728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5729#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0))]
5730#[rustc_legacy_const_generics(2)]
5731pub fn _mm_maskz_roundscale_ps<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128 {
5732 unsafe {
5733 static_assert_uimm_bits!(IMM8, 8);
5734 let a: f32x4 = a.as_f32x4();
5735 let r: f32x4 = vrndscaleps128(a, IMM8, src:f32x4::ZERO, mask:k);
5736 transmute(src:r)
5737 }
5738}
5739
5740/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5741/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5742/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5743/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5744/// * [`_MM_FROUND_TO_POS_INF`] : round up
5745/// * [`_MM_FROUND_TO_ZERO`] : truncate
5746/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5747///
5748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_pd&expand=4775)
5749#[inline]
5750#[target_feature(enable = "avx512f")]
5751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5752#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5753#[rustc_legacy_const_generics(1)]
5754pub fn _mm512_roundscale_pd<const IMM8: i32>(a: __m512d) -> __m512d {
5755 unsafe {
5756 static_assert_uimm_bits!(IMM8, 8);
5757 let a: f64x8 = a.as_f64x8();
5758 let r: f64x8 = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:0b11111111, _MM_FROUND_CUR_DIRECTION);
5759 transmute(src:r)
5760 }
5761}
5762
5763/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5764/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5765/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5766/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5767/// * [`_MM_FROUND_TO_POS_INF`] : round up
5768/// * [`_MM_FROUND_TO_ZERO`] : truncate
5769/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5770///
5771/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_pd&expand=4773)
5772#[inline]
5773#[target_feature(enable = "avx512f")]
5774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5775#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5776#[rustc_legacy_const_generics(3)]
5777pub fn _mm512_mask_roundscale_pd<const IMM8: i32>(
5778 src: __m512d,
5779 k: __mmask8,
5780 a: __m512d,
5781) -> __m512d {
5782 unsafe {
5783 static_assert_uimm_bits!(IMM8, 8);
5784 let a: f64x8 = a.as_f64x8();
5785 let src: f64x8 = src.as_f64x8();
5786 let r: f64x8 = vrndscalepd(a, IMM8, src, mask:k, _MM_FROUND_CUR_DIRECTION);
5787 transmute(src:r)
5788 }
5789}
5790
5791/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5792/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5793/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5794/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5795/// * [`_MM_FROUND_TO_POS_INF`] : round up
5796/// * [`_MM_FROUND_TO_ZERO`] : truncate
5797/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5798///
5799/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_pd&expand=4774)
5800#[inline]
5801#[target_feature(enable = "avx512f")]
5802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5803#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5804#[rustc_legacy_const_generics(2)]
5805pub fn _mm512_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m512d {
5806 unsafe {
5807 static_assert_uimm_bits!(IMM8, 8);
5808 let a: f64x8 = a.as_f64x8();
5809 let r: f64x8 = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:k, _MM_FROUND_CUR_DIRECTION);
5810 transmute(src:r)
5811 }
5812}
5813
5814/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5815/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5816/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5817/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5818/// * [`_MM_FROUND_TO_POS_INF`] : round up
5819/// * [`_MM_FROUND_TO_ZERO`] : truncate
5820/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5821///
5822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_roundscale_pd&expand=4772)
5823#[inline]
5824#[target_feature(enable = "avx512f,avx512vl")]
5825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5826#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5827#[rustc_legacy_const_generics(1)]
5828pub fn _mm256_roundscale_pd<const IMM8: i32>(a: __m256d) -> __m256d {
5829 unsafe {
5830 static_assert_uimm_bits!(IMM8, 8);
5831 let a: f64x4 = a.as_f64x4();
5832 let r: f64x4 = vrndscalepd256(a, IMM8, src:f64x4::ZERO, mask:0b00001111);
5833 transmute(src:r)
5834 }
5835}
5836
5837/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5838/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5839/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5840/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5841/// * [`_MM_FROUND_TO_POS_INF`] : round up
5842/// * [`_MM_FROUND_TO_ZERO`] : truncate
5843/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5844///
5845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_roundscale_pd&expand=4770)
5846#[inline]
5847#[target_feature(enable = "avx512f,avx512vl")]
5848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5849#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5850#[rustc_legacy_const_generics(3)]
5851pub fn _mm256_mask_roundscale_pd<const IMM8: i32>(
5852 src: __m256d,
5853 k: __mmask8,
5854 a: __m256d,
5855) -> __m256d {
5856 unsafe {
5857 static_assert_uimm_bits!(IMM8, 8);
5858 let a: f64x4 = a.as_f64x4();
5859 let src: f64x4 = src.as_f64x4();
5860 let r: f64x4 = vrndscalepd256(a, IMM8, src, mask:k);
5861 transmute(src:r)
5862 }
5863}
5864
5865/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5866/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5867/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5868/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5869/// * [`_MM_FROUND_TO_POS_INF`] : round up
5870/// * [`_MM_FROUND_TO_ZERO`] : truncate
5871/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5872///
5873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_roundscale_pd&expand=4771)
5874#[inline]
5875#[target_feature(enable = "avx512f,avx512vl")]
5876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5877#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5878#[rustc_legacy_const_generics(2)]
5879pub fn _mm256_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m256d) -> __m256d {
5880 unsafe {
5881 static_assert_uimm_bits!(IMM8, 8);
5882 let a: f64x4 = a.as_f64x4();
5883 let r: f64x4 = vrndscalepd256(a, IMM8, src:f64x4::ZERO, mask:k);
5884 transmute(src:r)
5885 }
5886}
5887
5888/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
5889/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5890/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5891/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5892/// * [`_MM_FROUND_TO_POS_INF`] : round up
5893/// * [`_MM_FROUND_TO_ZERO`] : truncate
5894/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5895///
5896/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_roundscale_pd&expand=4769)
5897#[inline]
5898#[target_feature(enable = "avx512f,avx512vl")]
5899#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5900#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5901#[rustc_legacy_const_generics(1)]
5902pub fn _mm_roundscale_pd<const IMM8: i32>(a: __m128d) -> __m128d {
5903 unsafe {
5904 static_assert_uimm_bits!(IMM8, 8);
5905 let a: f64x2 = a.as_f64x2();
5906 let r: f64x2 = vrndscalepd128(a, IMM8, src:f64x2::ZERO, mask:0b00000011);
5907 transmute(src:r)
5908 }
5909}
5910
5911/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
5912/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5913/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5914/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5915/// * [`_MM_FROUND_TO_POS_INF`] : round up
5916/// * [`_MM_FROUND_TO_ZERO`] : truncate
5917/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5918///
5919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_roundscale_pd&expand=4767)
5920#[inline]
5921#[target_feature(enable = "avx512f,avx512vl")]
5922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5923#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5924#[rustc_legacy_const_generics(3)]
5925pub fn _mm_mask_roundscale_pd<const IMM8: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
5926 unsafe {
5927 static_assert_uimm_bits!(IMM8, 8);
5928 let a: f64x2 = a.as_f64x2();
5929 let src: f64x2 = src.as_f64x2();
5930 let r: f64x2 = vrndscalepd128(a, IMM8, src, mask:k);
5931 transmute(src:r)
5932 }
5933}
5934
5935/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
5936/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
5937/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
5938/// * [`_MM_FROUND_TO_NEG_INF`] : round down
5939/// * [`_MM_FROUND_TO_POS_INF`] : round up
5940/// * [`_MM_FROUND_TO_ZERO`] : truncate
5941/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
5942///
5943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_roundscale_pd&expand=4768)
5944#[inline]
5945#[target_feature(enable = "avx512f,avx512vl")]
5946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5947#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0))]
5948#[rustc_legacy_const_generics(2)]
5949pub fn _mm_maskz_roundscale_pd<const IMM8: i32>(k: __mmask8, a: __m128d) -> __m128d {
5950 unsafe {
5951 static_assert_uimm_bits!(IMM8, 8);
5952 let a: f64x2 = a.as_f64x2();
5953 let r: f64x2 = vrndscalepd128(a, IMM8, src:f64x2::ZERO, mask:k);
5954 transmute(src:r)
5955 }
5956}
5957
5958/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
5959///
5960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_ps&expand=4883)
5961#[inline]
5962#[target_feature(enable = "avx512f")]
5963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5964#[cfg_attr(test, assert_instr(vscalefps))]
5965pub fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
5966 unsafe {
5967 transmute(src:vscalefps(
5968 a.as_f32x16(),
5969 b.as_f32x16(),
5970 src:f32x16::ZERO,
5971 mask:0b11111111_11111111,
5972 _MM_FROUND_CUR_DIRECTION,
5973 ))
5974 }
5975}
5976
5977/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
5978///
5979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_ps&expand=4881)
5980#[inline]
5981#[target_feature(enable = "avx512f")]
5982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
5983#[cfg_attr(test, assert_instr(vscalefps))]
5984pub fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
5985 unsafe {
5986 transmute(src:vscalefps(
5987 a.as_f32x16(),
5988 b.as_f32x16(),
5989 src.as_f32x16(),
5990 mask:k,
5991 _MM_FROUND_CUR_DIRECTION,
5992 ))
5993 }
5994}
5995
5996/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
5997///
5998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_ps&expand=4882)
5999#[inline]
6000#[target_feature(enable = "avx512f")]
6001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6002#[cfg_attr(test, assert_instr(vscalefps))]
6003pub fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
6004 unsafe {
6005 transmute(src:vscalefps(
6006 a.as_f32x16(),
6007 b.as_f32x16(),
6008 src:f32x16::ZERO,
6009 mask:k,
6010 _MM_FROUND_CUR_DIRECTION,
6011 ))
6012 }
6013}
6014
6015/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6016///
6017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_ps&expand=4880)
6018#[inline]
6019#[target_feature(enable = "avx512f,avx512vl")]
6020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6021#[cfg_attr(test, assert_instr(vscalefps))]
6022pub fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
6023 unsafe {
6024 transmute(src:vscalefps256(
6025 a.as_f32x8(),
6026 b.as_f32x8(),
6027 src:f32x8::ZERO,
6028 mask:0b11111111,
6029 ))
6030 }
6031}
6032
6033/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6034///
6035/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_ps&expand=4878)
6036#[inline]
6037#[target_feature(enable = "avx512f,avx512vl")]
6038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6039#[cfg_attr(test, assert_instr(vscalefps))]
6040pub fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
6041 unsafe { transmute(src:vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), mask:k)) }
6042}
6043
6044/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6045///
6046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_ps&expand=4879)
6047#[inline]
6048#[target_feature(enable = "avx512f,avx512vl")]
6049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6050#[cfg_attr(test, assert_instr(vscalefps))]
6051pub fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
6052 unsafe { transmute(src:vscalefps256(a.as_f32x8(), b.as_f32x8(), src:f32x8::ZERO, mask:k)) }
6053}
6054
6055/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.
6056///
6057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_ps&expand=4877)
6058#[inline]
6059#[target_feature(enable = "avx512f,avx512vl")]
6060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6061#[cfg_attr(test, assert_instr(vscalefps))]
6062pub fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
6063 unsafe {
6064 transmute(src:vscalefps128(
6065 a.as_f32x4(),
6066 b.as_f32x4(),
6067 src:f32x4::ZERO,
6068 mask:0b00001111,
6069 ))
6070 }
6071}
6072
6073/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6074///
6075/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_ps&expand=4875)
6076#[inline]
6077#[target_feature(enable = "avx512f,avx512vl")]
6078#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6079#[cfg_attr(test, assert_instr(vscalefps))]
6080pub fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
6081 unsafe { transmute(src:vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), mask:k)) }
6082}
6083
6084/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6085///
6086/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_ps&expand=4876)
6087#[inline]
6088#[target_feature(enable = "avx512f,avx512vl")]
6089#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6090#[cfg_attr(test, assert_instr(vscalefps))]
6091pub fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
6092 unsafe { transmute(src:vscalefps128(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:k)) }
6093}
6094
6095/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6096///
6097/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_pd&expand=4874)
6098#[inline]
6099#[target_feature(enable = "avx512f")]
6100#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6101#[cfg_attr(test, assert_instr(vscalefpd))]
6102pub fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
6103 unsafe {
6104 transmute(src:vscalefpd(
6105 a.as_f64x8(),
6106 b.as_f64x8(),
6107 src:f64x8::ZERO,
6108 mask:0b11111111,
6109 _MM_FROUND_CUR_DIRECTION,
6110 ))
6111 }
6112}
6113
6114/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6115///
6116/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_pd&expand=4872)
6117#[inline]
6118#[target_feature(enable = "avx512f")]
6119#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6120#[cfg_attr(test, assert_instr(vscalefpd))]
6121pub fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6122 unsafe {
6123 transmute(src:vscalefpd(
6124 a.as_f64x8(),
6125 b.as_f64x8(),
6126 src.as_f64x8(),
6127 mask:k,
6128 _MM_FROUND_CUR_DIRECTION,
6129 ))
6130 }
6131}
6132
6133/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6134///
6135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_pd&expand=4873)
6136#[inline]
6137#[target_feature(enable = "avx512f")]
6138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6139#[cfg_attr(test, assert_instr(vscalefpd))]
6140pub fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
6141 unsafe {
6142 transmute(src:vscalefpd(
6143 a.as_f64x8(),
6144 b.as_f64x8(),
6145 src:f64x8::ZERO,
6146 mask:k,
6147 _MM_FROUND_CUR_DIRECTION,
6148 ))
6149 }
6150}
6151
6152/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6153///
6154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_scalef_pd&expand=4871)
6155#[inline]
6156#[target_feature(enable = "avx512f,avx512vl")]
6157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6158#[cfg_attr(test, assert_instr(vscalefpd))]
6159pub fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
6160 unsafe {
6161 transmute(src:vscalefpd256(
6162 a.as_f64x4(),
6163 b.as_f64x4(),
6164 src:f64x4::ZERO,
6165 mask:0b00001111,
6166 ))
6167 }
6168}
6169
6170/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6171///
6172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_scalef_pd&expand=4869)
6173#[inline]
6174#[target_feature(enable = "avx512f,avx512vl")]
6175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6176#[cfg_attr(test, assert_instr(vscalefpd))]
6177pub fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6178 unsafe { transmute(src:vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), mask:k)) }
6179}
6180
6181/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6182///
6183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_scalef_pd&expand=4870)
6184#[inline]
6185#[target_feature(enable = "avx512f,avx512vl")]
6186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6187#[cfg_attr(test, assert_instr(vscalefpd))]
6188pub fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
6189 unsafe { transmute(src:vscalefpd256(a.as_f64x4(), b.as_f64x4(), src:f64x4::ZERO, mask:k)) }
6190}
6191
6192/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.
6193///
6194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_scalef_pd&expand=4868)
6195#[inline]
6196#[target_feature(enable = "avx512f,avx512vl")]
6197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6198#[cfg_attr(test, assert_instr(vscalefpd))]
6199pub fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
6200 unsafe {
6201 transmute(src:vscalefpd128(
6202 a.as_f64x2(),
6203 b.as_f64x2(),
6204 src:f64x2::ZERO,
6205 mask:0b00000011,
6206 ))
6207 }
6208}
6209
6210/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
6211///
6212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_scalef_pd&expand=4866)
6213#[inline]
6214#[target_feature(enable = "avx512f,avx512vl")]
6215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6216#[cfg_attr(test, assert_instr(vscalefpd))]
6217pub fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6218 unsafe { transmute(src:vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), mask:k)) }
6219}
6220
6221/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
6222///
6223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_scalef_pd&expand=4867)
6224#[inline]
6225#[target_feature(enable = "avx512f,avx512vl")]
6226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6227#[cfg_attr(test, assert_instr(vscalefpd))]
6228pub fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
6229 unsafe { transmute(src:vscalefpd128(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:k)) }
6230}
6231
6232/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6233///
6234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_ps&expand=2499)
6235#[inline]
6236#[target_feature(enable = "avx512f")]
6237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6238#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6239#[rustc_legacy_const_generics(3)]
6240pub fn _mm512_fixupimm_ps<const IMM8: i32>(a: __m512, b: __m512, c: __m512i) -> __m512 {
6241 unsafe {
6242 static_assert_uimm_bits!(IMM8, 8);
6243 let a: f32x16 = a.as_f32x16();
6244 let b: f32x16 = b.as_f32x16();
6245 let c: i32x16 = c.as_i32x16();
6246 let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:0b11111111_11111111, _MM_FROUND_CUR_DIRECTION);
6247 transmute(src:r)
6248 }
6249}
6250
6251/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6252///
6253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_ps&expand=2500)
6254#[inline]
6255#[target_feature(enable = "avx512f")]
6256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6257#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6258#[rustc_legacy_const_generics(4)]
6259pub fn _mm512_mask_fixupimm_ps<const IMM8: i32>(
6260 a: __m512,
6261 k: __mmask16,
6262 b: __m512,
6263 c: __m512i,
6264) -> __m512 {
6265 unsafe {
6266 static_assert_uimm_bits!(IMM8, 8);
6267 let a: f32x16 = a.as_f32x16();
6268 let b: f32x16 = b.as_f32x16();
6269 let c: i32x16 = c.as_i32x16();
6270 let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6271 transmute(src:r)
6272 }
6273}
6274
6275/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6276///
6277/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_ps&expand=2501)
6278#[inline]
6279#[target_feature(enable = "avx512f")]
6280#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6281#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6282#[rustc_legacy_const_generics(4)]
6283pub fn _mm512_maskz_fixupimm_ps<const IMM8: i32>(
6284 k: __mmask16,
6285 a: __m512,
6286 b: __m512,
6287 c: __m512i,
6288) -> __m512 {
6289 unsafe {
6290 static_assert_uimm_bits!(IMM8, 8);
6291 let a: f32x16 = a.as_f32x16();
6292 let b: f32x16 = b.as_f32x16();
6293 let c: i32x16 = c.as_i32x16();
6294 let r: f32x16 = vfixupimmpsz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6295 transmute(src:r)
6296 }
6297}
6298
6299/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6300///
6301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_ps&expand=2496)
6302#[inline]
6303#[target_feature(enable = "avx512f,avx512vl")]
6304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6305#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6306#[rustc_legacy_const_generics(3)]
6307pub fn _mm256_fixupimm_ps<const IMM8: i32>(a: __m256, b: __m256, c: __m256i) -> __m256 {
6308 unsafe {
6309 static_assert_uimm_bits!(IMM8, 8);
6310 let a: f32x8 = a.as_f32x8();
6311 let b: f32x8 = b.as_f32x8();
6312 let c: i32x8 = c.as_i32x8();
6313 let r: f32x8 = vfixupimmps256(a, b, c, IMM8, mask:0b11111111);
6314 transmute(src:r)
6315 }
6316}
6317
6318/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6319///
6320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_ps&expand=2497)
6321#[inline]
6322#[target_feature(enable = "avx512f,avx512vl")]
6323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6324#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6325#[rustc_legacy_const_generics(4)]
6326pub fn _mm256_mask_fixupimm_ps<const IMM8: i32>(
6327 a: __m256,
6328 k: __mmask8,
6329 b: __m256,
6330 c: __m256i,
6331) -> __m256 {
6332 unsafe {
6333 static_assert_uimm_bits!(IMM8, 8);
6334 let a: f32x8 = a.as_f32x8();
6335 let b: f32x8 = b.as_f32x8();
6336 let c: i32x8 = c.as_i32x8();
6337 let r: f32x8 = vfixupimmps256(a, b, c, IMM8, mask:k);
6338 transmute(src:r)
6339 }
6340}
6341
6342/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6343///
6344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_ps&expand=2498)
6345#[inline]
6346#[target_feature(enable = "avx512f,avx512vl")]
6347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6348#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6349#[rustc_legacy_const_generics(4)]
6350pub fn _mm256_maskz_fixupimm_ps<const IMM8: i32>(
6351 k: __mmask8,
6352 a: __m256,
6353 b: __m256,
6354 c: __m256i,
6355) -> __m256 {
6356 unsafe {
6357 static_assert_uimm_bits!(IMM8, 8);
6358 let a: f32x8 = a.as_f32x8();
6359 let b: f32x8 = b.as_f32x8();
6360 let c: i32x8 = c.as_i32x8();
6361 let r: f32x8 = vfixupimmpsz256(a, b, c, IMM8, mask:k);
6362 transmute(src:r)
6363 }
6364}
6365
6366/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6367///
6368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_ps&expand=2493)
6369#[inline]
6370#[target_feature(enable = "avx512f,avx512vl")]
6371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6372#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6373#[rustc_legacy_const_generics(3)]
6374pub fn _mm_fixupimm_ps<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
6375 unsafe {
6376 static_assert_uimm_bits!(IMM8, 8);
6377 let a: f32x4 = a.as_f32x4();
6378 let b: f32x4 = b.as_f32x4();
6379 let c: i32x4 = c.as_i32x4();
6380 let r: f32x4 = vfixupimmps128(a, b, c, IMM8, mask:0b00001111);
6381 transmute(src:r)
6382 }
6383}
6384
6385/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6386///
6387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_ps&expand=2494)
6388#[inline]
6389#[target_feature(enable = "avx512f,avx512vl")]
6390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6391#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6392#[rustc_legacy_const_generics(4)]
6393pub fn _mm_mask_fixupimm_ps<const IMM8: i32>(
6394 a: __m128,
6395 k: __mmask8,
6396 b: __m128,
6397 c: __m128i,
6398) -> __m128 {
6399 unsafe {
6400 static_assert_uimm_bits!(IMM8, 8);
6401 let a: f32x4 = a.as_f32x4();
6402 let b: f32x4 = b.as_f32x4();
6403 let c: i32x4 = c.as_i32x4();
6404 let r: f32x4 = vfixupimmps128(a, b, c, IMM8, mask:k);
6405 transmute(src:r)
6406 }
6407}
6408
6409/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6410///
6411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_ps&expand=2495)
6412#[inline]
6413#[target_feature(enable = "avx512f,avx512vl")]
6414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6415#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0))]
6416#[rustc_legacy_const_generics(4)]
6417pub fn _mm_maskz_fixupimm_ps<const IMM8: i32>(
6418 k: __mmask8,
6419 a: __m128,
6420 b: __m128,
6421 c: __m128i,
6422) -> __m128 {
6423 unsafe {
6424 static_assert_uimm_bits!(IMM8, 8);
6425 let a: f32x4 = a.as_f32x4();
6426 let b: f32x4 = b.as_f32x4();
6427 let c: i32x4 = c.as_i32x4();
6428 let r: f32x4 = vfixupimmpsz128(a, b, c, IMM8, mask:k);
6429 transmute(src:r)
6430 }
6431}
6432
6433/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6434///
6435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_pd&expand=2490)
6436#[inline]
6437#[target_feature(enable = "avx512f")]
6438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6439#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6440#[rustc_legacy_const_generics(3)]
6441pub fn _mm512_fixupimm_pd<const IMM8: i32>(a: __m512d, b: __m512d, c: __m512i) -> __m512d {
6442 unsafe {
6443 static_assert_uimm_bits!(IMM8, 8);
6444 let a: f64x8 = a.as_f64x8();
6445 let b: f64x8 = b.as_f64x8();
6446 let c: i64x8 = c.as_i64x8();
6447 let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:0b11111111, _MM_FROUND_CUR_DIRECTION);
6448 transmute(src:r)
6449 }
6450}
6451
6452/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6453///
6454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_pd&expand=2491)
6455#[inline]
6456#[target_feature(enable = "avx512f")]
6457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6458#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6459#[rustc_legacy_const_generics(4)]
6460pub fn _mm512_mask_fixupimm_pd<const IMM8: i32>(
6461 a: __m512d,
6462 k: __mmask8,
6463 b: __m512d,
6464 c: __m512i,
6465) -> __m512d {
6466 unsafe {
6467 static_assert_uimm_bits!(IMM8, 8);
6468 let a: f64x8 = a.as_f64x8();
6469 let b: f64x8 = b.as_f64x8();
6470 let c: i64x8 = c.as_i64x8();
6471 let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6472 transmute(src:r)
6473 }
6474}
6475
6476/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6477///
6478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_pd&expand=2492)
6479#[inline]
6480#[target_feature(enable = "avx512f")]
6481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6482#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6483#[rustc_legacy_const_generics(4)]
6484pub fn _mm512_maskz_fixupimm_pd<const IMM8: i32>(
6485 k: __mmask8,
6486 a: __m512d,
6487 b: __m512d,
6488 c: __m512i,
6489) -> __m512d {
6490 unsafe {
6491 static_assert_uimm_bits!(IMM8, 8);
6492 let a: f64x8 = a.as_f64x8();
6493 let b: f64x8 = b.as_f64x8();
6494 let c: i64x8 = c.as_i64x8();
6495 let r: f64x8 = vfixupimmpdz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
6496 transmute(src:r)
6497 }
6498}
6499
6500/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6501///
6502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fixupimm_pd&expand=2487)
6503#[inline]
6504#[target_feature(enable = "avx512f,avx512vl")]
6505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6506#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6507#[rustc_legacy_const_generics(3)]
6508pub fn _mm256_fixupimm_pd<const IMM8: i32>(a: __m256d, b: __m256d, c: __m256i) -> __m256d {
6509 unsafe {
6510 static_assert_uimm_bits!(IMM8, 8);
6511 let a: f64x4 = a.as_f64x4();
6512 let b: f64x4 = b.as_f64x4();
6513 let c: i64x4 = c.as_i64x4();
6514 let r: f64x4 = vfixupimmpd256(a, b, c, IMM8, mask:0b00001111);
6515 transmute(src:r)
6516 }
6517}
6518
6519/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6520///
6521/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_fixupimm_pd&expand=2488)
6522#[inline]
6523#[target_feature(enable = "avx512f,avx512vl")]
6524#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6525#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6526#[rustc_legacy_const_generics(4)]
6527pub fn _mm256_mask_fixupimm_pd<const IMM8: i32>(
6528 a: __m256d,
6529 k: __mmask8,
6530 b: __m256d,
6531 c: __m256i,
6532) -> __m256d {
6533 unsafe {
6534 static_assert_uimm_bits!(IMM8, 8);
6535 let a: f64x4 = a.as_f64x4();
6536 let b: f64x4 = b.as_f64x4();
6537 let c: i64x4 = c.as_i64x4();
6538 let r: f64x4 = vfixupimmpd256(a, b, c, IMM8, mask:k);
6539 transmute(src:r)
6540 }
6541}
6542
6543/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6544///
6545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_fixupimm_pd&expand=2489)
6546#[inline]
6547#[target_feature(enable = "avx512f,avx512vl")]
6548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6549#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6550#[rustc_legacy_const_generics(4)]
6551pub fn _mm256_maskz_fixupimm_pd<const IMM8: i32>(
6552 k: __mmask8,
6553 a: __m256d,
6554 b: __m256d,
6555 c: __m256i,
6556) -> __m256d {
6557 unsafe {
6558 static_assert_uimm_bits!(IMM8, 8);
6559 let a: f64x4 = a.as_f64x4();
6560 let b: f64x4 = b.as_f64x4();
6561 let c: i64x4 = c.as_i64x4();
6562 let r: f64x4 = vfixupimmpdz256(a, b, c, IMM8, mask:k);
6563 transmute(src:r)
6564 }
6565}
6566
6567/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.
6568///
6569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fixupimm_pd&expand=2484)
6570#[inline]
6571#[target_feature(enable = "avx512f,avx512vl")]
6572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6573#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6574#[rustc_legacy_const_generics(3)]
6575pub fn _mm_fixupimm_pd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
6576 unsafe {
6577 static_assert_uimm_bits!(IMM8, 8);
6578 let a: f64x2 = a.as_f64x2();
6579 let b: f64x2 = b.as_f64x2();
6580 let c: i64x2 = c.as_i64x2();
6581 let r: f64x2 = vfixupimmpd128(a, b, c, IMM8, mask:0b00000011);
6582 transmute(src:r)
6583 }
6584}
6585
6586/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6587///
6588/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fixupimm_pd&expand=2485)
6589#[inline]
6590#[target_feature(enable = "avx512f,avx512vl")]
6591#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6592#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6593#[rustc_legacy_const_generics(4)]
6594pub fn _mm_mask_fixupimm_pd<const IMM8: i32>(
6595 a: __m128d,
6596 k: __mmask8,
6597 b: __m128d,
6598 c: __m128i,
6599) -> __m128d {
6600 unsafe {
6601 static_assert_uimm_bits!(IMM8, 8);
6602 let a: f64x2 = a.as_f64x2();
6603 let b: f64x2 = b.as_f64x2();
6604 let c: i64x2 = c.as_i64x2();
6605 let r: f64x2 = vfixupimmpd128(a, b, c, IMM8, mask:k);
6606 transmute(src:r)
6607 }
6608}
6609
6610/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.
6611///
6612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fixupimm_pd&expand=2486)
6613#[inline]
6614#[target_feature(enable = "avx512f,avx512vl")]
6615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6616#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0))]
6617#[rustc_legacy_const_generics(4)]
6618pub fn _mm_maskz_fixupimm_pd<const IMM8: i32>(
6619 k: __mmask8,
6620 a: __m128d,
6621 b: __m128d,
6622 c: __m128i,
6623) -> __m128d {
6624 unsafe {
6625 static_assert_uimm_bits!(IMM8, 8);
6626 let a: f64x2 = a.as_f64x2();
6627 let b: f64x2 = b.as_f64x2();
6628 let c: i64x2 = c.as_i64x2();
6629 let r: f64x2 = vfixupimmpdz128(a, b, c, IMM8, mask:k);
6630 transmute(src:r)
6631 }
6632}
6633
6634/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6635///
6636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi32&expand=5867)
6637#[inline]
6638#[target_feature(enable = "avx512f")]
6639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6640#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6641#[rustc_legacy_const_generics(3)]
6642pub fn _mm512_ternarylogic_epi32<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6643 unsafe {
6644 static_assert_uimm_bits!(IMM8, 8);
6645 let a: i32x16 = a.as_i32x16();
6646 let b: i32x16 = b.as_i32x16();
6647 let c: i32x16 = c.as_i32x16();
6648 let r: i32x16 = vpternlogd(a, b, c, IMM8);
6649 transmute(src:r)
6650 }
6651}
6652
6653/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6654///
6655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi32&expand=5865)
6656#[inline]
6657#[target_feature(enable = "avx512f")]
6658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6659#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6660#[rustc_legacy_const_generics(4)]
6661pub fn _mm512_mask_ternarylogic_epi32<const IMM8: i32>(
6662 src: __m512i,
6663 k: __mmask16,
6664 a: __m512i,
6665 b: __m512i,
6666) -> __m512i {
6667 unsafe {
6668 static_assert_uimm_bits!(IMM8, 8);
6669 let src: i32x16 = src.as_i32x16();
6670 let a: i32x16 = a.as_i32x16();
6671 let b: i32x16 = b.as_i32x16();
6672 let r: i32x16 = vpternlogd(a:src, b:a, c:b, IMM8);
6673 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6674 }
6675}
6676
6677/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6678///
6679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi32&expand=5866)
6680#[inline]
6681#[target_feature(enable = "avx512f")]
6682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6683#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6684#[rustc_legacy_const_generics(4)]
6685pub fn _mm512_maskz_ternarylogic_epi32<const IMM8: i32>(
6686 k: __mmask16,
6687 a: __m512i,
6688 b: __m512i,
6689 c: __m512i,
6690) -> __m512i {
6691 unsafe {
6692 static_assert_uimm_bits!(IMM8, 8);
6693 let a: i32x16 = a.as_i32x16();
6694 let b: i32x16 = b.as_i32x16();
6695 let c: i32x16 = c.as_i32x16();
6696 let r: i32x16 = vpternlogd(a, b, c, IMM8);
6697 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
6698 }
6699}
6700
6701/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6702///
6703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi32&expand=5864)
6704#[inline]
6705#[target_feature(enable = "avx512f,avx512vl")]
6706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6707#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6708#[rustc_legacy_const_generics(3)]
6709pub fn _mm256_ternarylogic_epi32<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6710 unsafe {
6711 static_assert_uimm_bits!(IMM8, 8);
6712 let a: i32x8 = a.as_i32x8();
6713 let b: i32x8 = b.as_i32x8();
6714 let c: i32x8 = c.as_i32x8();
6715 let r: i32x8 = vpternlogd256(a, b, c, IMM8);
6716 transmute(src:r)
6717 }
6718}
6719
6720/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6721///
6722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi32&expand=5862)
6723#[inline]
6724#[target_feature(enable = "avx512f,avx512vl")]
6725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6726#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6727#[rustc_legacy_const_generics(4)]
6728pub fn _mm256_mask_ternarylogic_epi32<const IMM8: i32>(
6729 src: __m256i,
6730 k: __mmask8,
6731 a: __m256i,
6732 b: __m256i,
6733) -> __m256i {
6734 unsafe {
6735 static_assert_uimm_bits!(IMM8, 8);
6736 let src: i32x8 = src.as_i32x8();
6737 let a: i32x8 = a.as_i32x8();
6738 let b: i32x8 = b.as_i32x8();
6739 let r: i32x8 = vpternlogd256(a:src, b:a, c:b, IMM8);
6740 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6741 }
6742}
6743
6744/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6745///
6746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi32&expand=5863)
6747#[inline]
6748#[target_feature(enable = "avx512f,avx512vl")]
6749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6750#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6751#[rustc_legacy_const_generics(4)]
6752pub fn _mm256_maskz_ternarylogic_epi32<const IMM8: i32>(
6753 k: __mmask8,
6754 a: __m256i,
6755 b: __m256i,
6756 c: __m256i,
6757) -> __m256i {
6758 unsafe {
6759 static_assert_uimm_bits!(IMM8, 8);
6760 let a: i32x8 = a.as_i32x8();
6761 let b: i32x8 = b.as_i32x8();
6762 let c: i32x8 = c.as_i32x8();
6763 let r: i32x8 = vpternlogd256(a, b, c, IMM8);
6764 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
6765 }
6766}
6767
6768/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6769///
6770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi32&expand=5861)
6771#[inline]
6772#[target_feature(enable = "avx512f,avx512vl")]
6773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6774#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6775#[rustc_legacy_const_generics(3)]
6776pub fn _mm_ternarylogic_epi32<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6777 unsafe {
6778 static_assert_uimm_bits!(IMM8, 8);
6779 let a: i32x4 = a.as_i32x4();
6780 let b: i32x4 = b.as_i32x4();
6781 let c: i32x4 = c.as_i32x4();
6782 let r: i32x4 = vpternlogd128(a, b, c, IMM8);
6783 transmute(src:r)
6784 }
6785}
6786
6787/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 32-bit granularity (32-bit elements are copied from src when the corresponding mask bit is not set).
6788///
6789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi32&expand=5859)
6790#[inline]
6791#[target_feature(enable = "avx512f,avx512vl")]
6792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6793#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6794#[rustc_legacy_const_generics(4)]
6795pub fn _mm_mask_ternarylogic_epi32<const IMM8: i32>(
6796 src: __m128i,
6797 k: __mmask8,
6798 a: __m128i,
6799 b: __m128i,
6800) -> __m128i {
6801 unsafe {
6802 static_assert_uimm_bits!(IMM8, 8);
6803 let src: i32x4 = src.as_i32x4();
6804 let a: i32x4 = a.as_i32x4();
6805 let b: i32x4 = b.as_i32x4();
6806 let r: i32x4 = vpternlogd128(a:src, b:a, c:b, IMM8);
6807 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6808 }
6809}
6810
6811/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 32-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 32-bit granularity (32-bit elements are zeroed out when the corresponding mask bit is not set).
6812///
6813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi32&expand=5860)
6814#[inline]
6815#[target_feature(enable = "avx512f,avx512vl")]
6816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6817#[cfg_attr(test, assert_instr(vpternlogd, IMM8 = 114))]
6818#[rustc_legacy_const_generics(4)]
6819pub fn _mm_maskz_ternarylogic_epi32<const IMM8: i32>(
6820 k: __mmask8,
6821 a: __m128i,
6822 b: __m128i,
6823 c: __m128i,
6824) -> __m128i {
6825 unsafe {
6826 static_assert_uimm_bits!(IMM8, 8);
6827 let a: i32x4 = a.as_i32x4();
6828 let b: i32x4 = b.as_i32x4();
6829 let c: i32x4 = c.as_i32x4();
6830 let r: i32x4 = vpternlogd128(a, b, c, IMM8);
6831 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
6832 }
6833}
6834
6835/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6836///
6837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ternarylogic_epi64&expand=5876)
6838#[inline]
6839#[target_feature(enable = "avx512f")]
6840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6841#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6842#[rustc_legacy_const_generics(3)]
6843pub fn _mm512_ternarylogic_epi64<const IMM8: i32>(a: __m512i, b: __m512i, c: __m512i) -> __m512i {
6844 unsafe {
6845 static_assert_uimm_bits!(IMM8, 8);
6846 let a: i64x8 = a.as_i64x8();
6847 let b: i64x8 = b.as_i64x8();
6848 let c: i64x8 = c.as_i64x8();
6849 let r: i64x8 = vpternlogq(a, b, c, IMM8);
6850 transmute(src:r)
6851 }
6852}
6853
6854/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6855///
6856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ternarylogic_epi64&expand=5874)
6857#[inline]
6858#[target_feature(enable = "avx512f")]
6859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6860#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6861#[rustc_legacy_const_generics(4)]
6862pub fn _mm512_mask_ternarylogic_epi64<const IMM8: i32>(
6863 src: __m512i,
6864 k: __mmask8,
6865 a: __m512i,
6866 b: __m512i,
6867) -> __m512i {
6868 unsafe {
6869 static_assert_uimm_bits!(IMM8, 8);
6870 let src: i64x8 = src.as_i64x8();
6871 let a: i64x8 = a.as_i64x8();
6872 let b: i64x8 = b.as_i64x8();
6873 let r: i64x8 = vpternlogq(a:src, b:a, c:b, IMM8);
6874 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6875 }
6876}
6877
6878/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6879///
6880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ternarylogic_epi64&expand=5875)
6881#[inline]
6882#[target_feature(enable = "avx512f")]
6883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6884#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6885#[rustc_legacy_const_generics(4)]
6886pub fn _mm512_maskz_ternarylogic_epi64<const IMM8: i32>(
6887 k: __mmask8,
6888 a: __m512i,
6889 b: __m512i,
6890 c: __m512i,
6891) -> __m512i {
6892 unsafe {
6893 static_assert_uimm_bits!(IMM8, 8);
6894 let a: i64x8 = a.as_i64x8();
6895 let b: i64x8 = b.as_i64x8();
6896 let c: i64x8 = c.as_i64x8();
6897 let r: i64x8 = vpternlogq(a, b, c, IMM8);
6898 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x8::ZERO))
6899 }
6900}
6901
6902/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6903///
6904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ternarylogic_epi64&expand=5873)
6905#[inline]
6906#[target_feature(enable = "avx512f,avx512vl")]
6907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6908#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6909#[rustc_legacy_const_generics(3)]
6910pub fn _mm256_ternarylogic_epi64<const IMM8: i32>(a: __m256i, b: __m256i, c: __m256i) -> __m256i {
6911 unsafe {
6912 static_assert_uimm_bits!(IMM8, 8);
6913 let a: i64x4 = a.as_i64x4();
6914 let b: i64x4 = b.as_i64x4();
6915 let c: i64x4 = c.as_i64x4();
6916 let r: i64x4 = vpternlogq256(a, b, c, IMM8);
6917 transmute(src:r)
6918 }
6919}
6920
6921/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6922///
6923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ternarylogic_epi64&expand=5871)
6924#[inline]
6925#[target_feature(enable = "avx512f,avx512vl")]
6926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6927#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6928#[rustc_legacy_const_generics(4)]
6929pub fn _mm256_mask_ternarylogic_epi64<const IMM8: i32>(
6930 src: __m256i,
6931 k: __mmask8,
6932 a: __m256i,
6933 b: __m256i,
6934) -> __m256i {
6935 unsafe {
6936 static_assert_uimm_bits!(IMM8, 8);
6937 let src: i64x4 = src.as_i64x4();
6938 let a: i64x4 = a.as_i64x4();
6939 let b: i64x4 = b.as_i64x4();
6940 let r: i64x4 = vpternlogq256(a:src, b:a, c:b, IMM8);
6941 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
6942 }
6943}
6944
6945/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
6946///
6947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ternarylogic_epi64&expand=5872)
6948#[inline]
6949#[target_feature(enable = "avx512f,avx512vl")]
6950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6951#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6952#[rustc_legacy_const_generics(4)]
6953pub fn _mm256_maskz_ternarylogic_epi64<const IMM8: i32>(
6954 k: __mmask8,
6955 a: __m256i,
6956 b: __m256i,
6957 c: __m256i,
6958) -> __m256i {
6959 unsafe {
6960 static_assert_uimm_bits!(IMM8, 8);
6961 let a: i64x4 = a.as_i64x4();
6962 let b: i64x4 = b.as_i64x4();
6963 let c: i64x4 = c.as_i64x4();
6964 let r: i64x4 = vpternlogq256(a, b, c, IMM8);
6965 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x4::ZERO))
6966 }
6967}
6968
6969/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst.
6970///
6971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ternarylogic_epi64&expand=5870)
6972#[inline]
6973#[target_feature(enable = "avx512f,avx512vl")]
6974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6975#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6976#[rustc_legacy_const_generics(3)]
6977pub fn _mm_ternarylogic_epi64<const IMM8: i32>(a: __m128i, b: __m128i, c: __m128i) -> __m128i {
6978 unsafe {
6979 static_assert_uimm_bits!(IMM8, 8);
6980 let a: i64x2 = a.as_i64x2();
6981 let b: i64x2 = b.as_i64x2();
6982 let c: i64x2 = c.as_i64x2();
6983 let r: i64x2 = vpternlogq128(a, b, c, IMM8);
6984 transmute(src:r)
6985 }
6986}
6987
6988/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from src, a, and b are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using writemask k at 64-bit granularity (64-bit elements are copied from src when the corresponding mask bit is not set).
6989///
6990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ternarylogic_epi64&expand=5868)
6991#[inline]
6992#[target_feature(enable = "avx512f,avx512vl")]
6993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
6994#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
6995#[rustc_legacy_const_generics(4)]
6996pub fn _mm_mask_ternarylogic_epi64<const IMM8: i32>(
6997 src: __m128i,
6998 k: __mmask8,
6999 a: __m128i,
7000 b: __m128i,
7001) -> __m128i {
7002 unsafe {
7003 static_assert_uimm_bits!(IMM8, 8);
7004 let src: i64x2 = src.as_i64x2();
7005 let a: i64x2 = a.as_i64x2();
7006 let b: i64x2 = b.as_i64x2();
7007 let r: i64x2 = vpternlogq128(a:src, b:a, c:b, IMM8);
7008 transmute(src:simd_select_bitmask(m:k, yes:r, no:src))
7009 }
7010}
7011
7012/// Bitwise ternary logic that provides the capability to implement any three-operand binary function; the specific binary function is specified by value in imm8. For each bit in each packed 64-bit integer, the corresponding bit from a, b, and c are used to form a 3 bit index into imm8, and the value at that bit in imm8 is written to the corresponding bit in dst using zeromask k at 64-bit granularity (64-bit elements are zeroed out when the corresponding mask bit is not set).
7013///
7014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ternarylogic_epi64&expand=5869)
7015#[inline]
7016#[target_feature(enable = "avx512f,avx512vl")]
7017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7018#[cfg_attr(test, assert_instr(vpternlogq, IMM8 = 114))]
7019#[rustc_legacy_const_generics(4)]
7020pub fn _mm_maskz_ternarylogic_epi64<const IMM8: i32>(
7021 k: __mmask8,
7022 a: __m128i,
7023 b: __m128i,
7024 c: __m128i,
7025) -> __m128i {
7026 unsafe {
7027 static_assert_uimm_bits!(IMM8, 8);
7028 let a: i64x2 = a.as_i64x2();
7029 let b: i64x2 = b.as_i64x2();
7030 let c: i64x2 = c.as_i64x2();
7031 let r: i64x2 = vpternlogq128(a, b, c, IMM8);
7032 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x2::ZERO))
7033 }
7034}
7035
7036/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7037/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7038/// _MM_MANT_NORM_1_2 // interval [1, 2)
7039/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
7040/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
7041/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7042/// The sign is determined by sc which can take the following values:
7043/// _MM_MANT_SIGN_src // sign = sign(src)
7044/// _MM_MANT_SIGN_zero // sign = 0
7045/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7046///
7047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_ps&expand=2880)
7048#[inline]
7049#[target_feature(enable = "avx512f")]
7050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7051#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7052#[rustc_legacy_const_generics(1, 2)]
7053pub fn _mm512_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7054 a: __m512,
7055) -> __m512 {
7056 unsafe {
7057 static_assert_uimm_bits!(NORM, 4);
7058 static_assert_uimm_bits!(SIGN, 2);
7059 let a: f32x16 = a.as_f32x16();
7060 let zero: f32x16 = f32x16::ZERO;
7061 let r: f32x16 = vgetmantps(
7062 a,
7063 SIGN << 2 | NORM,
7064 src:zero,
7065 m:0b11111111_11111111,
7066 _MM_FROUND_CUR_DIRECTION,
7067 );
7068 transmute(src:r)
7069 }
7070}
7071
7072/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7073/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7074/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7075/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7076/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7077/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7078/// The sign is determined by sc which can take the following values:\
7079/// _MM_MANT_SIGN_src // sign = sign(src)\
7080/// _MM_MANT_SIGN_zero // sign = 0\
7081/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7082///
7083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_ps&expand=2881)
7084#[inline]
7085#[target_feature(enable = "avx512f")]
7086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7087#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7088#[rustc_legacy_const_generics(3, 4)]
7089pub fn _mm512_mask_getmant_ps<
7090 const NORM: _MM_MANTISSA_NORM_ENUM,
7091 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7092>(
7093 src: __m512,
7094 k: __mmask16,
7095 a: __m512,
7096) -> __m512 {
7097 unsafe {
7098 static_assert_uimm_bits!(NORM, 4);
7099 static_assert_uimm_bits!(SIGN, 2);
7100 let a: f32x16 = a.as_f32x16();
7101 let src: f32x16 = src.as_f32x16();
7102 let r: f32x16 = vgetmantps(a, SIGN << 2 | NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
7103 transmute(src:r)
7104 }
7105}
7106
7107/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7108/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7109/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7110/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7111/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7112/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7113/// The sign is determined by sc which can take the following values:\
7114/// _MM_MANT_SIGN_src // sign = sign(src)\
7115/// _MM_MANT_SIGN_zero // sign = 0\
7116/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7117///
7118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_ps&expand=2882)
7119#[inline]
7120#[target_feature(enable = "avx512f")]
7121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7122#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7123#[rustc_legacy_const_generics(2, 3)]
7124pub fn _mm512_maskz_getmant_ps<
7125 const NORM: _MM_MANTISSA_NORM_ENUM,
7126 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7127>(
7128 k: __mmask16,
7129 a: __m512,
7130) -> __m512 {
7131 unsafe {
7132 static_assert_uimm_bits!(NORM, 4);
7133 static_assert_uimm_bits!(SIGN, 2);
7134 let a: f32x16 = a.as_f32x16();
7135 let r: f32x16 = vgetmantps(
7136 a,
7137 SIGN << 2 | NORM,
7138 src:f32x16::ZERO,
7139 m:k,
7140 _MM_FROUND_CUR_DIRECTION,
7141 );
7142 transmute(src:r)
7143 }
7144}
7145
7146/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7147/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7148/// _MM_MANT_NORM_1_2 // interval [1, 2)
7149/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
7150/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
7151/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7152/// The sign is determined by sc which can take the following values:
7153/// _MM_MANT_SIGN_src // sign = sign(src)
7154/// _MM_MANT_SIGN_zero // sign = 0
7155/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7156///
7157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_ps&expand=2877)
7158#[inline]
7159#[target_feature(enable = "avx512f,avx512vl")]
7160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7161#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7162#[rustc_legacy_const_generics(1, 2)]
7163pub fn _mm256_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7164 a: __m256,
7165) -> __m256 {
7166 unsafe {
7167 static_assert_uimm_bits!(NORM, 4);
7168 static_assert_uimm_bits!(SIGN, 2);
7169 let a: f32x8 = a.as_f32x8();
7170 let r: f32x8 = vgetmantps256(a, SIGN << 2 | NORM, src:f32x8::ZERO, m:0b11111111);
7171 transmute(src:r)
7172 }
7173}
7174
7175/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7176/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7177/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7178/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7179/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7180/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7181/// The sign is determined by sc which can take the following values:\
7182/// _MM_MANT_SIGN_src // sign = sign(src)\
7183/// _MM_MANT_SIGN_zero // sign = 0\
7184/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7185///
7186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_ps&expand=2878)
7187#[inline]
7188#[target_feature(enable = "avx512f,avx512vl")]
7189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7190#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7191#[rustc_legacy_const_generics(3, 4)]
7192pub fn _mm256_mask_getmant_ps<
7193 const NORM: _MM_MANTISSA_NORM_ENUM,
7194 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7195>(
7196 src: __m256,
7197 k: __mmask8,
7198 a: __m256,
7199) -> __m256 {
7200 unsafe {
7201 static_assert_uimm_bits!(NORM, 4);
7202 static_assert_uimm_bits!(SIGN, 2);
7203 let a: f32x8 = a.as_f32x8();
7204 let src: f32x8 = src.as_f32x8();
7205 let r: f32x8 = vgetmantps256(a, SIGN << 2 | NORM, src, m:k);
7206 transmute(src:r)
7207 }
7208}
7209
7210/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7211/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7212/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7213/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7214/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7215/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7216/// The sign is determined by sc which can take the following values:\
7217/// _MM_MANT_SIGN_src // sign = sign(src)\
7218/// _MM_MANT_SIGN_zero // sign = 0\
7219/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7220///
7221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_ps&expand=2879)
7222#[inline]
7223#[target_feature(enable = "avx512f,avx512vl")]
7224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7225#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7226#[rustc_legacy_const_generics(2, 3)]
7227pub fn _mm256_maskz_getmant_ps<
7228 const NORM: _MM_MANTISSA_NORM_ENUM,
7229 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7230>(
7231 k: __mmask8,
7232 a: __m256,
7233) -> __m256 {
7234 unsafe {
7235 static_assert_uimm_bits!(NORM, 4);
7236 static_assert_uimm_bits!(SIGN, 2);
7237 let a: f32x8 = a.as_f32x8();
7238 let r: f32x8 = vgetmantps256(a, SIGN << 2 | NORM, src:f32x8::ZERO, m:k);
7239 transmute(src:r)
7240 }
7241}
7242
7243/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.
7244/// The mantissa is normalized to the interval specified by interv, which can take the following values:
7245/// _MM_MANT_NORM_1_2 // interval [1, 2)
7246/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)
7247/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)
7248/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)
7249/// The sign is determined by sc which can take the following values:
7250/// _MM_MANT_SIGN_src // sign = sign(src)
7251/// _MM_MANT_SIGN_zero // sign = 0
7252/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7253///
7254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_ps&expand=2874)
7255#[inline]
7256#[target_feature(enable = "avx512f,avx512vl")]
7257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7258#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7259#[rustc_legacy_const_generics(1, 2)]
7260pub fn _mm_getmant_ps<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7261 a: __m128,
7262) -> __m128 {
7263 unsafe {
7264 static_assert_uimm_bits!(NORM, 4);
7265 static_assert_uimm_bits!(SIGN, 2);
7266 let a: f32x4 = a.as_f32x4();
7267 let r: f32x4 = vgetmantps128(a, SIGN << 2 | NORM, src:f32x4::ZERO, m:0b00001111);
7268 transmute(src:r)
7269 }
7270}
7271
7272/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7273/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7274/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7275/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7276/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7277/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7278/// The sign is determined by sc which can take the following values:\
7279/// _MM_MANT_SIGN_src // sign = sign(src)\
7280/// _MM_MANT_SIGN_zero // sign = 0\
7281/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7282///
7283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_ps&expand=2875)
7284#[inline]
7285#[target_feature(enable = "avx512f,avx512vl")]
7286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7287#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7288#[rustc_legacy_const_generics(3, 4)]
7289pub fn _mm_mask_getmant_ps<
7290 const NORM: _MM_MANTISSA_NORM_ENUM,
7291 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7292>(
7293 src: __m128,
7294 k: __mmask8,
7295 a: __m128,
7296) -> __m128 {
7297 unsafe {
7298 static_assert_uimm_bits!(NORM, 4);
7299 static_assert_uimm_bits!(SIGN, 2);
7300 let a: f32x4 = a.as_f32x4();
7301 let src: f32x4 = src.as_f32x4();
7302 let r: f32x4 = vgetmantps128(a, SIGN << 2 | NORM, src, m:k);
7303 transmute(src:r)
7304 }
7305}
7306
7307/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7308/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7309/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7310/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7311/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7312/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7313/// The sign is determined by sc which can take the following values:\
7314/// _MM_MANT_SIGN_src // sign = sign(src)\
7315/// _MM_MANT_SIGN_zero // sign = 0\
7316/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7317///
7318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_ps&expand=2876)
7319#[inline]
7320#[target_feature(enable = "avx512f,avx512vl")]
7321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7322#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0))]
7323#[rustc_legacy_const_generics(2, 3)]
7324pub fn _mm_maskz_getmant_ps<
7325 const NORM: _MM_MANTISSA_NORM_ENUM,
7326 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7327>(
7328 k: __mmask8,
7329 a: __m128,
7330) -> __m128 {
7331 unsafe {
7332 static_assert_uimm_bits!(NORM, 4);
7333 static_assert_uimm_bits!(SIGN, 2);
7334 let a: f32x4 = a.as_f32x4();
7335 let r: f32x4 = vgetmantps128(a, SIGN << 2 | NORM, src:f32x4::ZERO, m:k);
7336 transmute(src:r)
7337 }
7338}
7339
7340/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7341/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7342/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7343/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7344/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7345/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7346/// The sign is determined by sc which can take the following values:\
7347/// _MM_MANT_SIGN_src // sign = sign(src)\
7348/// _MM_MANT_SIGN_zero // sign = 0\
7349/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7350///
7351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_pd&expand=2871)
7352#[inline]
7353#[target_feature(enable = "avx512f")]
7354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7355#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7356#[rustc_legacy_const_generics(1, 2)]
7357pub fn _mm512_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7358 a: __m512d,
7359) -> __m512d {
7360 unsafe {
7361 static_assert_uimm_bits!(NORM, 4);
7362 static_assert_uimm_bits!(SIGN, 2);
7363 let a: f64x8 = a.as_f64x8();
7364 let zero: f64x8 = f64x8::ZERO;
7365 let r: f64x8 = vgetmantpd(
7366 a,
7367 SIGN << 2 | NORM,
7368 src:zero,
7369 m:0b11111111,
7370 _MM_FROUND_CUR_DIRECTION,
7371 );
7372 transmute(src:r)
7373 }
7374}
7375
7376/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7377/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7378/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7379/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7380/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7381/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7382/// The sign is determined by sc which can take the following values:\
7383/// _MM_MANT_SIGN_src // sign = sign(src)\
7384/// _MM_MANT_SIGN_zero // sign = 0\
7385/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7386///
7387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_pd&expand=2872)
7388#[inline]
7389#[target_feature(enable = "avx512f")]
7390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7391#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7392#[rustc_legacy_const_generics(3, 4)]
7393pub fn _mm512_mask_getmant_pd<
7394 const NORM: _MM_MANTISSA_NORM_ENUM,
7395 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7396>(
7397 src: __m512d,
7398 k: __mmask8,
7399 a: __m512d,
7400) -> __m512d {
7401 unsafe {
7402 static_assert_uimm_bits!(NORM, 4);
7403 static_assert_uimm_bits!(SIGN, 2);
7404 let a: f64x8 = a.as_f64x8();
7405 let src: f64x8 = src.as_f64x8();
7406 let r: f64x8 = vgetmantpd(a, SIGN << 2 | NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
7407 transmute(src:r)
7408 }
7409}
7410
7411/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7412/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7413/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7414/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7415/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7416/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7417/// The sign is determined by sc which can take the following values:\
7418/// _MM_MANT_SIGN_src // sign = sign(src)\
7419/// _MM_MANT_SIGN_zero // sign = 0\
7420/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7421///
7422/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_pd&expand=2873)
7423#[inline]
7424#[target_feature(enable = "avx512f")]
7425#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7426#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7427#[rustc_legacy_const_generics(2, 3)]
7428pub fn _mm512_maskz_getmant_pd<
7429 const NORM: _MM_MANTISSA_NORM_ENUM,
7430 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7431>(
7432 k: __mmask8,
7433 a: __m512d,
7434) -> __m512d {
7435 unsafe {
7436 static_assert_uimm_bits!(NORM, 4);
7437 static_assert_uimm_bits!(SIGN, 2);
7438 let a: f64x8 = a.as_f64x8();
7439 let r: f64x8 = vgetmantpd(
7440 a,
7441 SIGN << 2 | NORM,
7442 src:f64x8::ZERO,
7443 m:k,
7444 _MM_FROUND_CUR_DIRECTION,
7445 );
7446 transmute(src:r)
7447 }
7448}
7449
7450/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7451/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7452/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7453/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7454/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7455/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7456/// The sign is determined by sc which can take the following values:\
7457/// _MM_MANT_SIGN_src // sign = sign(src)\
7458/// _MM_MANT_SIGN_zero // sign = 0\
7459/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7460///
7461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_getmant_pd&expand=2868)
7462#[inline]
7463#[target_feature(enable = "avx512f,avx512vl")]
7464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7465#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7466#[rustc_legacy_const_generics(1, 2)]
7467pub fn _mm256_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7468 a: __m256d,
7469) -> __m256d {
7470 unsafe {
7471 static_assert_uimm_bits!(NORM, 4);
7472 static_assert_uimm_bits!(SIGN, 2);
7473 let a: f64x4 = a.as_f64x4();
7474 let r: f64x4 = vgetmantpd256(a, SIGN << 2 | NORM, src:f64x4::ZERO, m:0b00001111);
7475 transmute(src:r)
7476 }
7477}
7478
7479/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7480/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7481/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7482/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7483/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7484/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7485/// The sign is determined by sc which can take the following values:\
7486/// _MM_MANT_SIGN_src // sign = sign(src)\
7487/// _MM_MANT_SIGN_zero // sign = 0\
7488/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7489///
7490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_getmant_pd&expand=2869)
7491#[inline]
7492#[target_feature(enable = "avx512f,avx512vl")]
7493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7494#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7495#[rustc_legacy_const_generics(3, 4)]
7496pub fn _mm256_mask_getmant_pd<
7497 const NORM: _MM_MANTISSA_NORM_ENUM,
7498 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7499>(
7500 src: __m256d,
7501 k: __mmask8,
7502 a: __m256d,
7503) -> __m256d {
7504 unsafe {
7505 static_assert_uimm_bits!(NORM, 4);
7506 static_assert_uimm_bits!(SIGN, 2);
7507 let a: f64x4 = a.as_f64x4();
7508 let src: f64x4 = src.as_f64x4();
7509 let r: f64x4 = vgetmantpd256(a, SIGN << 2 | NORM, src, m:k);
7510 transmute(src:r)
7511 }
7512}
7513
7514/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7515/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7516/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7517/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7518/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7519/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7520/// The sign is determined by sc which can take the following values:\
7521/// _MM_MANT_SIGN_src // sign = sign(src)\
7522/// _MM_MANT_SIGN_zero // sign = 0\
7523/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7524///
7525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_getmant_pd&expand=2870)
7526#[inline]
7527#[target_feature(enable = "avx512f,avx512vl")]
7528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7529#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7530#[rustc_legacy_const_generics(2, 3)]
7531pub fn _mm256_maskz_getmant_pd<
7532 const NORM: _MM_MANTISSA_NORM_ENUM,
7533 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7534>(
7535 k: __mmask8,
7536 a: __m256d,
7537) -> __m256d {
7538 unsafe {
7539 static_assert_uimm_bits!(NORM, 4);
7540 static_assert_uimm_bits!(SIGN, 2);
7541 let a: f64x4 = a.as_f64x4();
7542 let r: f64x4 = vgetmantpd256(a, SIGN << 2 | NORM, src:f64x4::ZERO, m:k);
7543 transmute(src:r)
7544 }
7545}
7546
7547/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7548/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7549/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7550/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7551/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7552/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7553/// The sign is determined by sc which can take the following values:\
7554/// _MM_MANT_SIGN_src // sign = sign(src)\
7555/// _MM_MANT_SIGN_zero // sign = 0\
7556/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7557///
7558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_getmant_pd&expand=2865)
7559#[inline]
7560#[target_feature(enable = "avx512f,avx512vl")]
7561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7562#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7563#[rustc_legacy_const_generics(1, 2)]
7564pub fn _mm_getmant_pd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
7565 a: __m128d,
7566) -> __m128d {
7567 unsafe {
7568 static_assert_uimm_bits!(NORM, 4);
7569 static_assert_uimm_bits!(SIGN, 2);
7570 let a: f64x2 = a.as_f64x2();
7571 let r: f64x2 = vgetmantpd128(a, SIGN << 2 | NORM, src:f64x2::ZERO, m:0b00000011);
7572 transmute(src:r)
7573 }
7574}
7575
7576/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7577/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7578/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7579/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7580/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7581/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7582/// The sign is determined by sc which can take the following values:\
7583/// _MM_MANT_SIGN_src // sign = sign(src)\
7584/// _MM_MANT_SIGN_zero // sign = 0\
7585/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7586///
7587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_getmant_pd&expand=2866)
7588#[inline]
7589#[target_feature(enable = "avx512f,avx512vl")]
7590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7591#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7592#[rustc_legacy_const_generics(3, 4)]
7593pub fn _mm_mask_getmant_pd<
7594 const NORM: _MM_MANTISSA_NORM_ENUM,
7595 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7596>(
7597 src: __m128d,
7598 k: __mmask8,
7599 a: __m128d,
7600) -> __m128d {
7601 unsafe {
7602 static_assert_uimm_bits!(NORM, 4);
7603 static_assert_uimm_bits!(SIGN, 2);
7604 let a: f64x2 = a.as_f64x2();
7605 let src: f64x2 = src.as_f64x2();
7606 let r: f64x2 = vgetmantpd128(a, SIGN << 2 | NORM, src, m:k);
7607 transmute(src:r)
7608 }
7609}
7610
7611/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
7612/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
7613/// _MM_MANT_NORM_1_2 // interval [1, 2)\
7614/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
7615/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
7616/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
7617/// The sign is determined by sc which can take the following values:\
7618/// _MM_MANT_SIGN_src // sign = sign(src)\
7619/// _MM_MANT_SIGN_zero // sign = 0\
7620/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1
7621///
7622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_getmant_pd&expand=2867)
7623#[inline]
7624#[target_feature(enable = "avx512f,avx512vl")]
7625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7626#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0))]
7627#[rustc_legacy_const_generics(2, 3)]
7628pub fn _mm_maskz_getmant_pd<
7629 const NORM: _MM_MANTISSA_NORM_ENUM,
7630 const SIGN: _MM_MANTISSA_SIGN_ENUM,
7631>(
7632 k: __mmask8,
7633 a: __m128d,
7634) -> __m128d {
7635 unsafe {
7636 static_assert_uimm_bits!(NORM, 4);
7637 static_assert_uimm_bits!(SIGN, 2);
7638 let a: f64x2 = a.as_f64x2();
7639 let r: f64x2 = vgetmantpd128(a, SIGN << 2 | NORM, src:f64x2::ZERO, m:k);
7640 transmute(src:r)
7641 }
7642}
7643
7644/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7645///
7646/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7647/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7648/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7649/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7650/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7651/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7652///
7653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_ps&expand=145)
7654#[inline]
7655#[target_feature(enable = "avx512f")]
7656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7657#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7658#[rustc_legacy_const_generics(2)]
7659pub fn _mm512_add_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7660 unsafe {
7661 static_assert_rounding!(ROUNDING);
7662 let a: f32x16 = a.as_f32x16();
7663 let b: f32x16 = b.as_f32x16();
7664 let r: f32x16 = vaddps(a, b, ROUNDING);
7665 transmute(src:r)
7666 }
7667}
7668
7669/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7670///
7671/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7672/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7673/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7674/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7675/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7676/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7677///
7678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_ps&expand=146)
7679#[inline]
7680#[target_feature(enable = "avx512f")]
7681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7682#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7683#[rustc_legacy_const_generics(4)]
7684pub fn _mm512_mask_add_round_ps<const ROUNDING: i32>(
7685 src: __m512,
7686 k: __mmask16,
7687 a: __m512,
7688 b: __m512,
7689) -> __m512 {
7690 unsafe {
7691 static_assert_rounding!(ROUNDING);
7692 let a: f32x16 = a.as_f32x16();
7693 let b: f32x16 = b.as_f32x16();
7694 let r: f32x16 = vaddps(a, b, ROUNDING);
7695 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
7696 }
7697}
7698
7699/// Add packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7700///
7701/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7702/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7703/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7704/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7705/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7706/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7707///
7708/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_ps&expand=147)
7709#[inline]
7710#[target_feature(enable = "avx512f")]
7711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7712#[cfg_attr(test, assert_instr(vaddps, ROUNDING = 8))]
7713#[rustc_legacy_const_generics(3)]
7714pub fn _mm512_maskz_add_round_ps<const ROUNDING: i32>(
7715 k: __mmask16,
7716 a: __m512,
7717 b: __m512,
7718) -> __m512 {
7719 unsafe {
7720 static_assert_rounding!(ROUNDING);
7721 let a: f32x16 = a.as_f32x16();
7722 let b: f32x16 = b.as_f32x16();
7723 let r: f32x16 = vaddps(a, b, ROUNDING);
7724 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
7725 }
7726}
7727
7728/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
7729///
7730/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7731/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7732/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7733/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7734/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7735/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7736///
7737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_add_round_pd&expand=142)
7738#[inline]
7739#[target_feature(enable = "avx512f")]
7740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7741#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7742#[rustc_legacy_const_generics(2)]
7743pub fn _mm512_add_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7744 unsafe {
7745 static_assert_rounding!(ROUNDING);
7746 let a: f64x8 = a.as_f64x8();
7747 let b: f64x8 = b.as_f64x8();
7748 let r: f64x8 = vaddpd(a, b, ROUNDING);
7749 transmute(src:r)
7750 }
7751}
7752
7753/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7754///
7755/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7756/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7757/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7758/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7759/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7760/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7761///
7762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_add_round_pd&expand=143)
7763#[inline]
7764#[target_feature(enable = "avx512f")]
7765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7766#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7767#[rustc_legacy_const_generics(4)]
7768pub fn _mm512_mask_add_round_pd<const ROUNDING: i32>(
7769 src: __m512d,
7770 k: __mmask8,
7771 a: __m512d,
7772 b: __m512d,
7773) -> __m512d {
7774 unsafe {
7775 static_assert_rounding!(ROUNDING);
7776 let a: f64x8 = a.as_f64x8();
7777 let b: f64x8 = b.as_f64x8();
7778 let r: f64x8 = vaddpd(a, b, ROUNDING);
7779 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
7780 }
7781}
7782
7783/// Add packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7784///
7785/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7786/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7787/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7788/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7789/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7790/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7791///
7792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_add_round_pd&expand=144)
7793#[inline]
7794#[target_feature(enable = "avx512f")]
7795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7796#[cfg_attr(test, assert_instr(vaddpd, ROUNDING = 8))]
7797#[rustc_legacy_const_generics(3)]
7798pub fn _mm512_maskz_add_round_pd<const ROUNDING: i32>(
7799 k: __mmask8,
7800 a: __m512d,
7801 b: __m512d,
7802) -> __m512d {
7803 unsafe {
7804 static_assert_rounding!(ROUNDING);
7805 let a: f64x8 = a.as_f64x8();
7806 let b: f64x8 = b.as_f64x8();
7807 let r: f64x8 = vaddpd(a, b, ROUNDING);
7808 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
7809 }
7810}
7811
7812/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
7813///
7814/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7815/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7816/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7817/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7818/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7819/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7820///
7821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_ps&expand=5739)
7822#[inline]
7823#[target_feature(enable = "avx512f")]
7824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7825#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7826#[rustc_legacy_const_generics(2)]
7827pub fn _mm512_sub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7828 unsafe {
7829 static_assert_rounding!(ROUNDING);
7830 let a: f32x16 = a.as_f32x16();
7831 let b: f32x16 = b.as_f32x16();
7832 let r: f32x16 = vsubps(a, b, ROUNDING);
7833 transmute(src:r)
7834 }
7835}
7836
7837/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7838///
7839/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7840/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7841/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7842/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7843/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7844/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7845///
7846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_ps&expand=5737)
7847#[inline]
7848#[target_feature(enable = "avx512f")]
7849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7850#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7851#[rustc_legacy_const_generics(4)]
7852pub fn _mm512_mask_sub_round_ps<const ROUNDING: i32>(
7853 src: __m512,
7854 k: __mmask16,
7855 a: __m512,
7856 b: __m512,
7857) -> __m512 {
7858 unsafe {
7859 static_assert_rounding!(ROUNDING);
7860 let a: f32x16 = a.as_f32x16();
7861 let b: f32x16 = b.as_f32x16();
7862 let r: f32x16 = vsubps(a, b, ROUNDING);
7863 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
7864 }
7865}
7866
7867/// Subtract packed single-precision (32-bit) floating-point elements in b from packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7868///
7869/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7870/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7871/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7872/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7873/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7874/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7875///
7876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_ps&expand=5738)
7877#[inline]
7878#[target_feature(enable = "avx512f")]
7879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7880#[cfg_attr(test, assert_instr(vsubps, ROUNDING = 8))]
7881#[rustc_legacy_const_generics(3)]
7882pub fn _mm512_maskz_sub_round_ps<const ROUNDING: i32>(
7883 k: __mmask16,
7884 a: __m512,
7885 b: __m512,
7886) -> __m512 {
7887 unsafe {
7888 static_assert_rounding!(ROUNDING);
7889 let a: f32x16 = a.as_f32x16();
7890 let b: f32x16 = b.as_f32x16();
7891 let r: f32x16 = vsubps(a, b, ROUNDING);
7892 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
7893 }
7894}
7895
7896/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
7897///
7898/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7904///
7905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sub_round_pd&expand=5736)
7906#[inline]
7907#[target_feature(enable = "avx512f")]
7908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7909#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7910#[rustc_legacy_const_generics(2)]
7911pub fn _mm512_sub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
7912 unsafe {
7913 static_assert_rounding!(ROUNDING);
7914 let a: f64x8 = a.as_f64x8();
7915 let b: f64x8 = b.as_f64x8();
7916 let r: f64x8 = vsubpd(a, b, ROUNDING);
7917 transmute(src:r)
7918 }
7919}
7920
7921/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
7922///
7923/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7924/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7925/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7926/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7927/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7928/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7929///
7930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sub_round_pd&expand=5734)
7931#[inline]
7932#[target_feature(enable = "avx512f")]
7933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7934#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7935#[rustc_legacy_const_generics(4)]
7936pub fn _mm512_mask_sub_round_pd<const ROUNDING: i32>(
7937 src: __m512d,
7938 k: __mmask8,
7939 a: __m512d,
7940 b: __m512d,
7941) -> __m512d {
7942 unsafe {
7943 static_assert_rounding!(ROUNDING);
7944 let a: f64x8 = a.as_f64x8();
7945 let b: f64x8 = b.as_f64x8();
7946 let r: f64x8 = vsubpd(a, b, ROUNDING);
7947 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
7948 }
7949}
7950
7951/// Subtract packed double-precision (64-bit) floating-point elements in b from packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
7952///
7953/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7954/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7955/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7956/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7957/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7958/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7959///
7960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sub_round_pd&expand=5735)
7961#[inline]
7962#[target_feature(enable = "avx512f")]
7963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7964#[cfg_attr(test, assert_instr(vsubpd, ROUNDING = 8))]
7965#[rustc_legacy_const_generics(3)]
7966pub fn _mm512_maskz_sub_round_pd<const ROUNDING: i32>(
7967 k: __mmask8,
7968 a: __m512d,
7969 b: __m512d,
7970) -> __m512d {
7971 unsafe {
7972 static_assert_rounding!(ROUNDING);
7973 let a: f64x8 = a.as_f64x8();
7974 let b: f64x8 = b.as_f64x8();
7975 let r: f64x8 = vsubpd(a, b, ROUNDING);
7976 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
7977 }
7978}
7979
7980/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst.\
7981///
7982/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
7983/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
7984/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
7985/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
7986/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
7987/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
7988///
7989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_ps&expand=3940)
7990#[inline]
7991#[target_feature(enable = "avx512f")]
7992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
7993#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
7994#[rustc_legacy_const_generics(2)]
7995pub fn _mm512_mul_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
7996 unsafe {
7997 static_assert_rounding!(ROUNDING);
7998 let a: f32x16 = a.as_f32x16();
7999 let b: f32x16 = b.as_f32x16();
8000 let r: f32x16 = vmulps(a, b, ROUNDING);
8001 transmute(src:r)
8002 }
8003}
8004
8005/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8006///
8007/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8008/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8009/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8010/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8011/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8012/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8013///
8014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_ps&expand=3938)
8015#[inline]
8016#[target_feature(enable = "avx512f")]
8017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8018#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8019#[rustc_legacy_const_generics(4)]
8020pub fn _mm512_mask_mul_round_ps<const ROUNDING: i32>(
8021 src: __m512,
8022 k: __mmask16,
8023 a: __m512,
8024 b: __m512,
8025) -> __m512 {
8026 unsafe {
8027 static_assert_rounding!(ROUNDING);
8028 let a: f32x16 = a.as_f32x16();
8029 let b: f32x16 = b.as_f32x16();
8030 let r: f32x16 = vmulps(a, b, ROUNDING);
8031 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8032 }
8033}
8034
8035/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8036///
8037/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8038/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8039/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8040/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8041/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8042/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8043///
8044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_ps&expand=3939)
8045#[inline]
8046#[target_feature(enable = "avx512f")]
8047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8048#[cfg_attr(test, assert_instr(vmulps, ROUNDING = 8))]
8049#[rustc_legacy_const_generics(3)]
8050pub fn _mm512_maskz_mul_round_ps<const ROUNDING: i32>(
8051 k: __mmask16,
8052 a: __m512,
8053 b: __m512,
8054) -> __m512 {
8055 unsafe {
8056 static_assert_rounding!(ROUNDING);
8057 let a: f32x16 = a.as_f32x16();
8058 let b: f32x16 = b.as_f32x16();
8059 let r: f32x16 = vmulps(a, b, ROUNDING);
8060 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8061 }
8062}
8063
8064/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst.\
8065///
8066/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8067/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8068/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8069/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8070/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8071/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8072///
8073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mul_round_pd&expand=3937)
8074#[inline]
8075#[target_feature(enable = "avx512f")]
8076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8077#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8078#[rustc_legacy_const_generics(2)]
8079pub fn _mm512_mul_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8080 unsafe {
8081 static_assert_rounding!(ROUNDING);
8082 let a: f64x8 = a.as_f64x8();
8083 let b: f64x8 = b.as_f64x8();
8084 let r: f64x8 = vmulpd(a, b, ROUNDING);
8085 transmute(src:r)
8086 }
8087}
8088
8089/// Multiply packed double-precision (64-bit) floating-point elements in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8090///
8091/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8092/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8093/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8094/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8095/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8096/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8097///
8098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_mul_round_pd&expand=3935)
8099#[inline]
8100#[target_feature(enable = "avx512f")]
8101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8102#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8103#[rustc_legacy_const_generics(4)]
8104pub fn _mm512_mask_mul_round_pd<const ROUNDING: i32>(
8105 src: __m512d,
8106 k: __mmask8,
8107 a: __m512d,
8108 b: __m512d,
8109) -> __m512d {
8110 unsafe {
8111 static_assert_rounding!(ROUNDING);
8112 let a: f64x8 = a.as_f64x8();
8113 let b: f64x8 = b.as_f64x8();
8114 let r: f64x8 = vmulpd(a, b, ROUNDING);
8115 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8116 }
8117}
8118
8119/// Multiply packed single-precision (32-bit) floating-point elements in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8120///
8121/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8122/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8123/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8124/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8125/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8126/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8127///
8128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_mul_round_pd&expand=3939)
8129#[inline]
8130#[target_feature(enable = "avx512f")]
8131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8132#[cfg_attr(test, assert_instr(vmulpd, ROUNDING = 8))]
8133#[rustc_legacy_const_generics(3)]
8134pub fn _mm512_maskz_mul_round_pd<const ROUNDING: i32>(
8135 k: __mmask8,
8136 a: __m512d,
8137 b: __m512d,
8138) -> __m512d {
8139 unsafe {
8140 static_assert_rounding!(ROUNDING);
8141 let a: f64x8 = a.as_f64x8();
8142 let b: f64x8 = b.as_f64x8();
8143 let r: f64x8 = vmulpd(a, b, ROUNDING);
8144 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8145 }
8146}
8147
8148/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst.\
8149///
8150/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8151/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8152/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8153/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8154/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8155/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8156///
8157/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_ps&expand=2168)
8158#[inline]
8159#[target_feature(enable = "avx512f")]
8160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8161#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8162#[rustc_legacy_const_generics(2)]
8163pub fn _mm512_div_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
8164 unsafe {
8165 static_assert_rounding!(ROUNDING);
8166 let a: f32x16 = a.as_f32x16();
8167 let b: f32x16 = b.as_f32x16();
8168 let r: f32x16 = vdivps(a, b, ROUNDING);
8169 transmute(src:r)
8170 }
8171}
8172
8173/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8174///
8175/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8176/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8177/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8178/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8179/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8180/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8181///
8182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_ps&expand=2169)
8183#[inline]
8184#[target_feature(enable = "avx512f")]
8185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8186#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8187#[rustc_legacy_const_generics(4)]
8188pub fn _mm512_mask_div_round_ps<const ROUNDING: i32>(
8189 src: __m512,
8190 k: __mmask16,
8191 a: __m512,
8192 b: __m512,
8193) -> __m512 {
8194 unsafe {
8195 static_assert_rounding!(ROUNDING);
8196 let a: f32x16 = a.as_f32x16();
8197 let b: f32x16 = b.as_f32x16();
8198 let r: f32x16 = vdivps(a, b, ROUNDING);
8199 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8200 }
8201}
8202
8203/// Divide packed single-precision (32-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8204///
8205/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8206/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8207/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8208/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8209/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8210/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8211///
8212/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_ps&expand=2170)
8213#[inline]
8214#[target_feature(enable = "avx512f")]
8215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8216#[cfg_attr(test, assert_instr(vdivps, ROUNDING = 8))]
8217#[rustc_legacy_const_generics(3)]
8218pub fn _mm512_maskz_div_round_ps<const ROUNDING: i32>(
8219 k: __mmask16,
8220 a: __m512,
8221 b: __m512,
8222) -> __m512 {
8223 unsafe {
8224 static_assert_rounding!(ROUNDING);
8225 let a: f32x16 = a.as_f32x16();
8226 let b: f32x16 = b.as_f32x16();
8227 let r: f32x16 = vdivps(a, b, ROUNDING);
8228 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8229 }
8230}
8231
8232/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, =and store the results in dst.\
8233///
8234/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8235/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8236/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8237/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8238/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8239/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8240///
8241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_div_round_pd&expand=2165)
8242#[inline]
8243#[target_feature(enable = "avx512f")]
8244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8245#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8246#[rustc_legacy_const_generics(2)]
8247pub fn _mm512_div_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
8248 unsafe {
8249 static_assert_rounding!(ROUNDING);
8250 let a: f64x8 = a.as_f64x8();
8251 let b: f64x8 = b.as_f64x8();
8252 let r: f64x8 = vdivpd(a, b, ROUNDING);
8253 transmute(src:r)
8254 }
8255}
8256
8257/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8258///
8259/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8260/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8261/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8262/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8263/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8264/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8265///
8266/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_div_round_pd&expand=2166)
8267#[inline]
8268#[target_feature(enable = "avx512f")]
8269#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8270#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8271#[rustc_legacy_const_generics(4)]
8272pub fn _mm512_mask_div_round_pd<const ROUNDING: i32>(
8273 src: __m512d,
8274 k: __mmask8,
8275 a: __m512d,
8276 b: __m512d,
8277) -> __m512d {
8278 unsafe {
8279 static_assert_rounding!(ROUNDING);
8280 let a: f64x8 = a.as_f64x8();
8281 let b: f64x8 = b.as_f64x8();
8282 let r: f64x8 = vdivpd(a, b, ROUNDING);
8283 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8284 }
8285}
8286
8287/// Divide packed double-precision (64-bit) floating-point elements in a by packed elements in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8288///
8289/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8290/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8291/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8292/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8293/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8294/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8295///
8296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_div_round_pd&expand=2167)
8297#[inline]
8298#[target_feature(enable = "avx512f")]
8299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8300#[cfg_attr(test, assert_instr(vdivpd, ROUNDING = 8))]
8301#[rustc_legacy_const_generics(3)]
8302pub fn _mm512_maskz_div_round_pd<const ROUNDING: i32>(
8303 k: __mmask8,
8304 a: __m512d,
8305 b: __m512d,
8306) -> __m512d {
8307 unsafe {
8308 static_assert_rounding!(ROUNDING);
8309 let a: f64x8 = a.as_f64x8();
8310 let b: f64x8 = b.as_f64x8();
8311 let r: f64x8 = vdivpd(a, b, ROUNDING);
8312 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8313 }
8314}
8315
8316/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst.\
8317///
8318/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8319/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8320/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8321/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8322/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8323/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8324///
8325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_ps&expand=5377)
8326#[inline]
8327#[target_feature(enable = "avx512f")]
8328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8329#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8330#[rustc_legacy_const_generics(1)]
8331pub fn _mm512_sqrt_round_ps<const ROUNDING: i32>(a: __m512) -> __m512 {
8332 unsafe {
8333 static_assert_rounding!(ROUNDING);
8334 let a: f32x16 = a.as_f32x16();
8335 let r: f32x16 = vsqrtps(a, ROUNDING);
8336 transmute(src:r)
8337 }
8338}
8339
8340/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8341///
8342/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8343/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8344/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8345/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8346/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8347/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8348///
8349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_ps&expand=5375)
8350#[inline]
8351#[target_feature(enable = "avx512f")]
8352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8353#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8354#[rustc_legacy_const_generics(3)]
8355pub fn _mm512_mask_sqrt_round_ps<const ROUNDING: i32>(
8356 src: __m512,
8357 k: __mmask16,
8358 a: __m512,
8359) -> __m512 {
8360 unsafe {
8361 static_assert_rounding!(ROUNDING);
8362 let a: f32x16 = a.as_f32x16();
8363 let r: f32x16 = vsqrtps(a, ROUNDING);
8364 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
8365 }
8366}
8367
8368/// Compute the square root of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8369///
8370/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8371/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8372/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8373/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8374/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8375/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8376///
8377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_ps&expand=5376)
8378#[inline]
8379#[target_feature(enable = "avx512f")]
8380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8381#[cfg_attr(test, assert_instr(vsqrtps, ROUNDING = 8))]
8382#[rustc_legacy_const_generics(2)]
8383pub fn _mm512_maskz_sqrt_round_ps<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512 {
8384 unsafe {
8385 static_assert_rounding!(ROUNDING);
8386 let a: f32x16 = a.as_f32x16();
8387 let r: f32x16 = vsqrtps(a, ROUNDING);
8388 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
8389 }
8390}
8391
8392/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst.\
8393///
8394/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8395/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8396/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8397/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8398/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8399/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8400///
8401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sqrt_round_pd&expand=5374)
8402#[inline]
8403#[target_feature(enable = "avx512f")]
8404#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8405#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8406#[rustc_legacy_const_generics(1)]
8407pub fn _mm512_sqrt_round_pd<const ROUNDING: i32>(a: __m512d) -> __m512d {
8408 unsafe {
8409 static_assert_rounding!(ROUNDING);
8410 let a: f64x8 = a.as_f64x8();
8411 let r: f64x8 = vsqrtpd(a, ROUNDING);
8412 transmute(src:r)
8413 }
8414}
8415
8416/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
8417///
8418/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8419/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8420/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8421/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8422/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8423/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8424///
8425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sqrt_round_pd&expand=5372)
8426#[inline]
8427#[target_feature(enable = "avx512f")]
8428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8429#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8430#[rustc_legacy_const_generics(3)]
8431pub fn _mm512_mask_sqrt_round_pd<const ROUNDING: i32>(
8432 src: __m512d,
8433 k: __mmask8,
8434 a: __m512d,
8435) -> __m512d {
8436 unsafe {
8437 static_assert_rounding!(ROUNDING);
8438 let a: f64x8 = a.as_f64x8();
8439 let r: f64x8 = vsqrtpd(a, ROUNDING);
8440 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
8441 }
8442}
8443
8444/// Compute the square root of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8445///
8446/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8447/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8448/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8449/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8450/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8451/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8452///
8453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sqrt_round_pd&expand=5373)
8454#[inline]
8455#[target_feature(enable = "avx512f")]
8456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8457#[cfg_attr(test, assert_instr(vsqrtpd, ROUNDING = 8))]
8458#[rustc_legacy_const_generics(2)]
8459pub fn _mm512_maskz_sqrt_round_pd<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m512d {
8460 unsafe {
8461 static_assert_rounding!(ROUNDING);
8462 let a: f64x8 = a.as_f64x8();
8463 let r: f64x8 = vsqrtpd(a, ROUNDING);
8464 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
8465 }
8466}
8467
8468/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8469///
8470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8476///
8477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_ps&expand=2565)
8478#[inline]
8479#[target_feature(enable = "avx512f")]
8480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8481#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8482#[rustc_legacy_const_generics(3)]
8483pub fn _mm512_fmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8484 unsafe {
8485 static_assert_rounding!(ROUNDING);
8486 vfmadd132psround(a, b, c, ROUNDING)
8487 }
8488}
8489
8490/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8491///
8492/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8493/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8494/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8495/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8496/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8497/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8498///
8499/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_ps&expand=2566)
8500#[inline]
8501#[target_feature(enable = "avx512f")]
8502#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8503#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8504#[rustc_legacy_const_generics(4)]
8505pub fn _mm512_mask_fmadd_round_ps<const ROUNDING: i32>(
8506 a: __m512,
8507 k: __mmask16,
8508 b: __m512,
8509 c: __m512,
8510) -> __m512 {
8511 unsafe {
8512 static_assert_rounding!(ROUNDING);
8513 simd_select_bitmask(m:k, yes:vfmadd132psround(a, b, c, ROUNDING), no:a)
8514 }
8515}
8516
8517/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in a using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8518///
8519/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8520/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8521/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8522/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8523/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8524/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8525///
8526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_ps&expand=2568)
8527#[inline]
8528#[target_feature(enable = "avx512f")]
8529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8530#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8531#[rustc_legacy_const_generics(4)]
8532pub fn _mm512_maskz_fmadd_round_ps<const ROUNDING: i32>(
8533 k: __mmask16,
8534 a: __m512,
8535 b: __m512,
8536 c: __m512,
8537) -> __m512 {
8538 unsafe {
8539 static_assert_rounding!(ROUNDING);
8540 simd_select_bitmask(m:k, yes:vfmadd132psround(a, b, c, ROUNDING), no:_mm512_setzero_ps())
8541 }
8542}
8543
8544/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8545///
8546/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8547/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8548/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8549/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8550/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8551/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8552///
8553/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_ps&expand=2567)
8554#[inline]
8555#[target_feature(enable = "avx512f")]
8556#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8557#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132ps or vfmadd213ps or vfmadd231ps
8558#[rustc_legacy_const_generics(4)]
8559pub fn _mm512_mask3_fmadd_round_ps<const ROUNDING: i32>(
8560 a: __m512,
8561 b: __m512,
8562 c: __m512,
8563 k: __mmask16,
8564) -> __m512 {
8565 unsafe {
8566 static_assert_rounding!(ROUNDING);
8567 simd_select_bitmask(m:k, yes:vfmadd132psround(a, b, c, ROUNDING), no:c)
8568 }
8569}
8570
8571/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst.\
8572///
8573/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8574/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8575/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8576/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8577/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8578/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8579///
8580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmadd_round_pd&expand=2561)
8581#[inline]
8582#[target_feature(enable = "avx512f")]
8583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8584#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8585#[rustc_legacy_const_generics(3)]
8586pub fn _mm512_fmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8587 unsafe {
8588 static_assert_rounding!(ROUNDING);
8589 vfmadd132pdround(a, b, c, ROUNDING)
8590 }
8591}
8592
8593/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8594///
8595/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8596/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8597/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8598/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8599/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8600/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8601///
8602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmadd_round_pd&expand=2562)
8603#[inline]
8604#[target_feature(enable = "avx512f")]
8605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8606#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8607#[rustc_legacy_const_generics(4)]
8608pub fn _mm512_mask_fmadd_round_pd<const ROUNDING: i32>(
8609 a: __m512d,
8610 k: __mmask8,
8611 b: __m512d,
8612 c: __m512d,
8613) -> __m512d {
8614 unsafe {
8615 static_assert_rounding!(ROUNDING);
8616 simd_select_bitmask(m:k, yes:vfmadd132pdround(a, b, c, ROUNDING), no:a)
8617 }
8618}
8619
8620/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8621///
8622/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8623/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8624/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8625/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8626/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8627/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8628///
8629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmadd_round_pd&expand=2564)
8630#[inline]
8631#[target_feature(enable = "avx512f")]
8632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8633#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8634#[rustc_legacy_const_generics(4)]
8635pub fn _mm512_maskz_fmadd_round_pd<const ROUNDING: i32>(
8636 k: __mmask8,
8637 a: __m512d,
8638 b: __m512d,
8639 c: __m512d,
8640) -> __m512d {
8641 unsafe {
8642 static_assert_rounding!(ROUNDING);
8643 simd_select_bitmask(m:k, yes:vfmadd132pdround(a, b, c, ROUNDING), no:_mm512_setzero_pd())
8644 }
8645}
8646
8647/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8648///
8649/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8650/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8651/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8652/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8653/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8654/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8655///
8656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmadd_round_pd&expand=2563)
8657#[inline]
8658#[target_feature(enable = "avx512f")]
8659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8660#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmadd132pd or vfmadd213pd or vfmadd231pd
8661#[rustc_legacy_const_generics(4)]
8662pub fn _mm512_mask3_fmadd_round_pd<const ROUNDING: i32>(
8663 a: __m512d,
8664 b: __m512d,
8665 c: __m512d,
8666 k: __mmask8,
8667) -> __m512d {
8668 unsafe {
8669 static_assert_rounding!(ROUNDING);
8670 simd_select_bitmask(m:k, yes:vfmadd132pdround(a, b, c, ROUNDING), no:c)
8671 }
8672}
8673
8674/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8675///
8676/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8677/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8678/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8679/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8680/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8681/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8682///
8683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_ps&expand=2651)
8684#[inline]
8685#[target_feature(enable = "avx512f")]
8686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8687#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8688#[rustc_legacy_const_generics(3)]
8689pub fn _mm512_fmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8690 unsafe {
8691 static_assert_rounding!(ROUNDING);
8692 vfmadd132psround(a, b, c:simd_neg(c), ROUNDING)
8693 }
8694}
8695
8696/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8697///
8698/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8699/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8700/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8701/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8702/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8703/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8704///
8705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_ps&expand=2652)
8706#[inline]
8707#[target_feature(enable = "avx512f")]
8708#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8709#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8710#[rustc_legacy_const_generics(4)]
8711pub fn _mm512_mask_fmsub_round_ps<const ROUNDING: i32>(
8712 a: __m512,
8713 k: __mmask16,
8714 b: __m512,
8715 c: __m512,
8716) -> __m512 {
8717 unsafe {
8718 static_assert_rounding!(ROUNDING);
8719 let r: __m512 = vfmadd132psround(a, b, c:simd_neg(c), ROUNDING);
8720 simd_select_bitmask(m:k, yes:r, no:a)
8721 }
8722}
8723
8724/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8725///
8726/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8727/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8728/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8729/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8730/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8731/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8732///
8733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_ps&expand=2654)
8734#[inline]
8735#[target_feature(enable = "avx512f")]
8736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8737#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8738#[rustc_legacy_const_generics(4)]
8739pub fn _mm512_maskz_fmsub_round_ps<const ROUNDING: i32>(
8740 k: __mmask16,
8741 a: __m512,
8742 b: __m512,
8743 c: __m512,
8744) -> __m512 {
8745 unsafe {
8746 static_assert_rounding!(ROUNDING);
8747 let r: __m512 = vfmadd132psround(a, b, c:simd_neg(c), ROUNDING);
8748 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
8749 }
8750}
8751
8752/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8753///
8754/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8755/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8756/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8757/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8758/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8759/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8760///
8761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_ps&expand=2653)
8762#[inline]
8763#[target_feature(enable = "avx512f")]
8764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8765#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
8766#[rustc_legacy_const_generics(4)]
8767pub fn _mm512_mask3_fmsub_round_ps<const ROUNDING: i32>(
8768 a: __m512,
8769 b: __m512,
8770 c: __m512,
8771 k: __mmask16,
8772) -> __m512 {
8773 unsafe {
8774 static_assert_rounding!(ROUNDING);
8775 let r: __m512 = vfmadd132psround(a, b, c:simd_neg(c), ROUNDING);
8776 simd_select_bitmask(m:k, yes:r, no:c)
8777 }
8778}
8779
8780/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst.\
8781///
8782/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8783/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8784/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8785/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8786/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8787/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8788///
8789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsub_round_pd&expand=2647)
8790#[inline]
8791#[target_feature(enable = "avx512f")]
8792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8793#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8794#[rustc_legacy_const_generics(3)]
8795pub fn _mm512_fmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
8796 unsafe {
8797 static_assert_rounding!(ROUNDING);
8798 vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING)
8799 }
8800}
8801
8802/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8803///
8804/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8805/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8806/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8807/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8808/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8809/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8810///
8811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsub_round_pd&expand=2648)
8812#[inline]
8813#[target_feature(enable = "avx512f")]
8814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8815#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8816#[rustc_legacy_const_generics(4)]
8817pub fn _mm512_mask_fmsub_round_pd<const ROUNDING: i32>(
8818 a: __m512d,
8819 k: __mmask8,
8820 b: __m512d,
8821 c: __m512d,
8822) -> __m512d {
8823 unsafe {
8824 static_assert_rounding!(ROUNDING);
8825 let r: __m512d = vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING);
8826 simd_select_bitmask(m:k, yes:r, no:a)
8827 }
8828}
8829
8830/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8831///
8832/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8833/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8834/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8835/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8836/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8837/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8838///
8839/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsub_round_pd&expand=2650)
8840#[inline]
8841#[target_feature(enable = "avx512f")]
8842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8843#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8844#[rustc_legacy_const_generics(4)]
8845pub fn _mm512_maskz_fmsub_round_pd<const ROUNDING: i32>(
8846 k: __mmask8,
8847 a: __m512d,
8848 b: __m512d,
8849 c: __m512d,
8850) -> __m512d {
8851 unsafe {
8852 static_assert_rounding!(ROUNDING);
8853 let r: __m512d = vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING);
8854 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
8855 }
8856}
8857
8858/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8859///
8860/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8861/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8862/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8863/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8864/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8865/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8866///
8867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsub_round_pd&expand=2649)
8868#[inline]
8869#[target_feature(enable = "avx512f")]
8870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8871#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
8872#[rustc_legacy_const_generics(4)]
8873pub fn _mm512_mask3_fmsub_round_pd<const ROUNDING: i32>(
8874 a: __m512d,
8875 b: __m512d,
8876 c: __m512d,
8877 k: __mmask8,
8878) -> __m512d {
8879 unsafe {
8880 static_assert_rounding!(ROUNDING);
8881 let r: __m512d = vfmadd132pdround(a, b, c:simd_neg(c), ROUNDING);
8882 simd_select_bitmask(m:k, yes:r, no:c)
8883 }
8884}
8885
8886/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8887///
8888/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8889/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8890/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8891/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8892/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8893/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8894///
8895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_ps&expand=2619)
8896#[inline]
8897#[target_feature(enable = "avx512f")]
8898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8899#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8900#[rustc_legacy_const_generics(3)]
8901pub fn _mm512_fmaddsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
8902 unsafe {
8903 static_assert_rounding!(ROUNDING);
8904 vfmaddsubpsround(a, b, c, ROUNDING)
8905 }
8906}
8907
8908/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
8909///
8910/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8911/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8912/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8913/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8914/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8915/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8916///
8917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_ps&expand=2620)
8918#[inline]
8919#[target_feature(enable = "avx512f")]
8920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8921#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8922#[rustc_legacy_const_generics(4)]
8923pub fn _mm512_mask_fmaddsub_round_ps<const ROUNDING: i32>(
8924 a: __m512,
8925 k: __mmask16,
8926 b: __m512,
8927 c: __m512,
8928) -> __m512 {
8929 unsafe {
8930 static_assert_rounding!(ROUNDING);
8931 simd_select_bitmask(m:k, yes:vfmaddsubpsround(a, b, c, ROUNDING), no:a)
8932 }
8933}
8934
8935/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
8936///
8937/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8938/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8939/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8940/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8941/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8942/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8943///
8944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_ps&expand=2622)
8945#[inline]
8946#[target_feature(enable = "avx512f")]
8947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8948#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8949#[rustc_legacy_const_generics(4)]
8950pub fn _mm512_maskz_fmaddsub_round_ps<const ROUNDING: i32>(
8951 k: __mmask16,
8952 a: __m512,
8953 b: __m512,
8954 c: __m512,
8955) -> __m512 {
8956 unsafe {
8957 static_assert_rounding!(ROUNDING);
8958 simd_select_bitmask(m:k, yes:vfmaddsubpsround(a, b, c, ROUNDING), no:_mm512_setzero_ps())
8959 }
8960}
8961
8962/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
8963///
8964/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8965/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8966/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8967/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8968/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8969/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8970///
8971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_ps&expand=2621)
8972#[inline]
8973#[target_feature(enable = "avx512f")]
8974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
8975#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132ps or vfmaddsub213ps or vfmaddsub231ps
8976#[rustc_legacy_const_generics(4)]
8977pub fn _mm512_mask3_fmaddsub_round_ps<const ROUNDING: i32>(
8978 a: __m512,
8979 b: __m512,
8980 c: __m512,
8981 k: __mmask16,
8982) -> __m512 {
8983 unsafe {
8984 static_assert_rounding!(ROUNDING);
8985 simd_select_bitmask(m:k, yes:vfmaddsubpsround(a, b, c, ROUNDING), no:c)
8986 }
8987}
8988
8989/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst.\
8990///
8991/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
8992/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
8993/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
8994/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
8995/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
8996/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
8997///
8998/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmaddsub_round_pd&expand=2615)
8999#[inline]
9000#[target_feature(enable = "avx512f")]
9001#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9002#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9003#[rustc_legacy_const_generics(3)]
9004pub fn _mm512_fmaddsub_round_pd<const ROUNDING: i32>(
9005 a: __m512d,
9006 b: __m512d,
9007 c: __m512d,
9008) -> __m512d {
9009 unsafe {
9010 static_assert_rounding!(ROUNDING);
9011 vfmaddsubpdround(a, b, c, ROUNDING)
9012 }
9013}
9014
9015/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9016///
9017/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9018/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9019/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9020/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9021/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9022/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9023///
9024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmaddsub_round_pd&expand=2616)
9025#[inline]
9026#[target_feature(enable = "avx512f")]
9027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9028#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9029#[rustc_legacy_const_generics(4)]
9030pub fn _mm512_mask_fmaddsub_round_pd<const ROUNDING: i32>(
9031 a: __m512d,
9032 k: __mmask8,
9033 b: __m512d,
9034 c: __m512d,
9035) -> __m512d {
9036 unsafe {
9037 static_assert_rounding!(ROUNDING);
9038 simd_select_bitmask(m:k, yes:vfmaddsubpdround(a, b, c, ROUNDING), no:a)
9039 }
9040}
9041
9042/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9043///
9044/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9045/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9046/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9047/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9048/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9049/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9050///
9051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmaddsub_round_pd&expand=2618)
9052#[inline]
9053#[target_feature(enable = "avx512f")]
9054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9055#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9056#[rustc_legacy_const_generics(4)]
9057pub fn _mm512_maskz_fmaddsub_round_pd<const ROUNDING: i32>(
9058 k: __mmask8,
9059 a: __m512d,
9060 b: __m512d,
9061 c: __m512d,
9062) -> __m512d {
9063 unsafe {
9064 static_assert_rounding!(ROUNDING);
9065 simd_select_bitmask(m:k, yes:vfmaddsubpdround(a, b, c, ROUNDING), no:_mm512_setzero_pd())
9066 }
9067}
9068
9069/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9070///
9071/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9072/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9073/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9074/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9075/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9076/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9077///
9078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmaddsub_round_pd&expand=2617)
9079#[inline]
9080#[target_feature(enable = "avx512f")]
9081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9082#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmaddsub132pd or vfmaddsub213pd or vfmaddsub231pd
9083#[rustc_legacy_const_generics(4)]
9084pub fn _mm512_mask3_fmaddsub_round_pd<const ROUNDING: i32>(
9085 a: __m512d,
9086 b: __m512d,
9087 c: __m512d,
9088 k: __mmask8,
9089) -> __m512d {
9090 unsafe {
9091 static_assert_rounding!(ROUNDING);
9092 simd_select_bitmask(m:k, yes:vfmaddsubpdround(a, b, c, ROUNDING), no:c)
9093 }
9094}
9095
9096/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9097///
9098/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9099/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9100/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9101/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9102/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9103/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9104///
9105/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_ps&expand=2699)
9106#[inline]
9107#[target_feature(enable = "avx512f")]
9108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9109#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9110#[rustc_legacy_const_generics(3)]
9111pub fn _mm512_fmsubadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9112 unsafe {
9113 static_assert_rounding!(ROUNDING);
9114 vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING)
9115 }
9116}
9117
9118/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9119///
9120/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9121/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9122/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9123/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9124/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9125/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9126///
9127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_ps&expand=2700)
9128#[inline]
9129#[target_feature(enable = "avx512f")]
9130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9131#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9132#[rustc_legacy_const_generics(4)]
9133pub fn _mm512_mask_fmsubadd_round_ps<const ROUNDING: i32>(
9134 a: __m512,
9135 k: __mmask16,
9136 b: __m512,
9137 c: __m512,
9138) -> __m512 {
9139 unsafe {
9140 static_assert_rounding!(ROUNDING);
9141 let r: __m512 = vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING);
9142 simd_select_bitmask(m:k, yes:r, no:a)
9143 }
9144}
9145
9146/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9147///
9148/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9149/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9150/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9151/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9152/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9153/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9154///
9155/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_ps&expand=2702)
9156#[inline]
9157#[target_feature(enable = "avx512f")]
9158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9159#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9160#[rustc_legacy_const_generics(4)]
9161pub fn _mm512_maskz_fmsubadd_round_ps<const ROUNDING: i32>(
9162 k: __mmask16,
9163 a: __m512,
9164 b: __m512,
9165 c: __m512,
9166) -> __m512 {
9167 unsafe {
9168 static_assert_rounding!(ROUNDING);
9169 let r: __m512 = vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING);
9170 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
9171 }
9172}
9173
9174/// Multiply packed single-precision (32-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9175///
9176/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9177/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9178/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9179/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9180/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9181/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9182///
9183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_ps&expand=2701)
9184#[inline]
9185#[target_feature(enable = "avx512f")]
9186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9187#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
9188#[rustc_legacy_const_generics(4)]
9189pub fn _mm512_mask3_fmsubadd_round_ps<const ROUNDING: i32>(
9190 a: __m512,
9191 b: __m512,
9192 c: __m512,
9193 k: __mmask16,
9194) -> __m512 {
9195 unsafe {
9196 static_assert_rounding!(ROUNDING);
9197 let r: __m512 = vfmaddsubpsround(a, b, c:simd_neg(c), ROUNDING);
9198 simd_select_bitmask(m:k, yes:r, no:c)
9199 }
9200}
9201
9202/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst.\
9203///
9204/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9205/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9206/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9207/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9208/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9209/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9210///
9211/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fmsubadd_round_pd&expand=2695)
9212#[inline]
9213#[target_feature(enable = "avx512f")]
9214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9215#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9216#[rustc_legacy_const_generics(3)]
9217pub fn _mm512_fmsubadd_round_pd<const ROUNDING: i32>(
9218 a: __m512d,
9219 b: __m512d,
9220 c: __m512d,
9221) -> __m512d {
9222 unsafe {
9223 static_assert_rounding!(ROUNDING);
9224 vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING)
9225 }
9226}
9227
9228/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9229///
9230/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9231/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9232/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9233/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9234/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9235/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9236///
9237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fmsubadd_round_pd&expand=2696)
9238#[inline]
9239#[target_feature(enable = "avx512f")]
9240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9241#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9242#[rustc_legacy_const_generics(4)]
9243pub fn _mm512_mask_fmsubadd_round_pd<const ROUNDING: i32>(
9244 a: __m512d,
9245 k: __mmask8,
9246 b: __m512d,
9247 c: __m512d,
9248) -> __m512d {
9249 unsafe {
9250 static_assert_rounding!(ROUNDING);
9251 let r: __m512d = vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING);
9252 simd_select_bitmask(m:k, yes:r, no:a)
9253 }
9254}
9255
9256/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively add and subtract packed elements in c to/from the intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9257///
9258/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9259/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9260/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9261/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9262/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9263/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9264///
9265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fmsubadd_round_pd&expand=2698)
9266#[inline]
9267#[target_feature(enable = "avx512f")]
9268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9269#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9270#[rustc_legacy_const_generics(4)]
9271pub fn _mm512_maskz_fmsubadd_round_pd<const ROUNDING: i32>(
9272 k: __mmask8,
9273 a: __m512d,
9274 b: __m512d,
9275 c: __m512d,
9276) -> __m512d {
9277 unsafe {
9278 static_assert_rounding!(ROUNDING);
9279 let r: __m512d = vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING);
9280 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
9281 }
9282}
9283
9284/// Multiply packed double-precision (64-bit) floating-point elements in a and b, alternatively subtract and add packed elements in c from/to the intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9285///
9286/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9287/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9288/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9289/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9290/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9291/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9292///
9293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fmsubadd_round_pd&expand=2697)
9294#[inline]
9295#[target_feature(enable = "avx512f")]
9296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9297#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
9298#[rustc_legacy_const_generics(4)]
9299pub fn _mm512_mask3_fmsubadd_round_pd<const ROUNDING: i32>(
9300 a: __m512d,
9301 b: __m512d,
9302 c: __m512d,
9303 k: __mmask8,
9304) -> __m512d {
9305 unsafe {
9306 static_assert_rounding!(ROUNDING);
9307 let r: __m512d = vfmaddsubpdround(a, b, c:simd_neg(c), ROUNDING);
9308 simd_select_bitmask(m:k, yes:r, no:c)
9309 }
9310}
9311
9312/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9313///
9314/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9315/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9316/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9317/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9318/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9319/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9320///
9321/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_ps&expand=2731)
9322#[inline]
9323#[target_feature(enable = "avx512f")]
9324#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9325#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9326#[rustc_legacy_const_generics(3)]
9327pub fn _mm512_fnmadd_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9328 unsafe {
9329 static_assert_rounding!(ROUNDING);
9330 vfmadd132psround(a:simd_neg(a), b, c, ROUNDING)
9331 }
9332}
9333
9334/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9335///
9336/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9337/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9338/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9339/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9340/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9341/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9342///
9343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_ps&expand=2732)
9344#[inline]
9345#[target_feature(enable = "avx512f")]
9346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9347#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9348#[rustc_legacy_const_generics(4)]
9349pub fn _mm512_mask_fnmadd_round_ps<const ROUNDING: i32>(
9350 a: __m512,
9351 k: __mmask16,
9352 b: __m512,
9353 c: __m512,
9354) -> __m512 {
9355 unsafe {
9356 static_assert_rounding!(ROUNDING);
9357 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c, ROUNDING);
9358 simd_select_bitmask(m:k, yes:r, no:a)
9359 }
9360}
9361
9362/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9363///
9364/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9365/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9366/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9367/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9368/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9369/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9370///
9371/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_ps&expand=2734)
9372#[inline]
9373#[target_feature(enable = "avx512f")]
9374#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9375#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9376#[rustc_legacy_const_generics(4)]
9377pub fn _mm512_maskz_fnmadd_round_ps<const ROUNDING: i32>(
9378 k: __mmask16,
9379 a: __m512,
9380 b: __m512,
9381 c: __m512,
9382) -> __m512 {
9383 unsafe {
9384 static_assert_rounding!(ROUNDING);
9385 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c, ROUNDING);
9386 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
9387 }
9388}
9389
9390/// Multiply packed single-precision (32-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9391///
9392/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9393/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9394/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9395/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9396/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9397/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9398///
9399/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_ps&expand=2733)
9400#[inline]
9401#[target_feature(enable = "avx512f")]
9402#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9403#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
9404#[rustc_legacy_const_generics(4)]
9405pub fn _mm512_mask3_fnmadd_round_ps<const ROUNDING: i32>(
9406 a: __m512,
9407 b: __m512,
9408 c: __m512,
9409 k: __mmask16,
9410) -> __m512 {
9411 unsafe {
9412 static_assert_rounding!(ROUNDING);
9413 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c, ROUNDING);
9414 simd_select_bitmask(m:k, yes:r, no:c)
9415 }
9416}
9417
9418/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst.\
9419///
9420/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9421/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9422/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9423/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9424/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9425/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9426///
9427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmadd_round_pd&expand=2711)
9428#[inline]
9429#[target_feature(enable = "avx512f")]
9430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9431#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9432#[rustc_legacy_const_generics(3)]
9433pub fn _mm512_fnmadd_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9434 unsafe {
9435 static_assert_rounding!(ROUNDING);
9436 vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING)
9437 }
9438}
9439
9440/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9441///
9442/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9443/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9444/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9445/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9446/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9447/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9448///
9449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmadd_round_pd&expand=2728)
9450#[inline]
9451#[target_feature(enable = "avx512f")]
9452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9453#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9454#[rustc_legacy_const_generics(4)]
9455pub fn _mm512_mask_fnmadd_round_pd<const ROUNDING: i32>(
9456 a: __m512d,
9457 k: __mmask8,
9458 b: __m512d,
9459 c: __m512d,
9460) -> __m512d {
9461 unsafe {
9462 static_assert_rounding!(ROUNDING);
9463 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING);
9464 simd_select_bitmask(m:k, yes:r, no:a)
9465 }
9466}
9467
9468/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9469///
9470/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9471/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9472/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9473/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9474/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9475/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9476///
9477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmadd_round_pd&expand=2730)
9478#[inline]
9479#[target_feature(enable = "avx512f")]
9480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9481#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9482#[rustc_legacy_const_generics(4)]
9483pub fn _mm512_maskz_fnmadd_round_pd<const ROUNDING: i32>(
9484 k: __mmask8,
9485 a: __m512d,
9486 b: __m512d,
9487 c: __m512d,
9488) -> __m512d {
9489 unsafe {
9490 static_assert_rounding!(ROUNDING);
9491 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING);
9492 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
9493 }
9494}
9495
9496/// Multiply packed double-precision (64-bit) floating-point elements in a and b, add the negated intermediate result to packed elements in c, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9497///
9498/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9499/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9500/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9501/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9502/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9503/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9504///
9505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmadd_round_pd&expand=2729)
9506#[inline]
9507#[target_feature(enable = "avx512f")]
9508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9509#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
9510#[rustc_legacy_const_generics(4)]
9511pub fn _mm512_mask3_fnmadd_round_pd<const ROUNDING: i32>(
9512 a: __m512d,
9513 b: __m512d,
9514 c: __m512d,
9515 k: __mmask8,
9516) -> __m512d {
9517 unsafe {
9518 static_assert_rounding!(ROUNDING);
9519 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c, ROUNDING);
9520 simd_select_bitmask(m:k, yes:r, no:c)
9521 }
9522}
9523
9524/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9525///
9526/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9527/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9528/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9529/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9530/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9531/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9532///
9533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_ps&expand=2779)
9534#[inline]
9535#[target_feature(enable = "avx512f")]
9536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9537#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9538#[rustc_legacy_const_generics(3)]
9539pub fn _mm512_fnmsub_round_ps<const ROUNDING: i32>(a: __m512, b: __m512, c: __m512) -> __m512 {
9540 unsafe {
9541 static_assert_rounding!(ROUNDING);
9542 vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING)
9543 }
9544}
9545
9546/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9547///
9548/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9549/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9550/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9551/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9552/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9553/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9554///
9555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_ps&expand=2780)
9556#[inline]
9557#[target_feature(enable = "avx512f")]
9558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9559#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9560#[rustc_legacy_const_generics(4)]
9561pub fn _mm512_mask_fnmsub_round_ps<const ROUNDING: i32>(
9562 a: __m512,
9563 k: __mmask16,
9564 b: __m512,
9565 c: __m512,
9566) -> __m512 {
9567 unsafe {
9568 static_assert_rounding!(ROUNDING);
9569 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9570 simd_select_bitmask(m:k, yes:r, no:a)
9571 }
9572}
9573
9574/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9575///
9576/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9577/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9578/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9579/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9580/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9581/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9582///
9583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_ps&expand=2782)
9584#[inline]
9585#[target_feature(enable = "avx512f")]
9586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9587#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9588#[rustc_legacy_const_generics(4)]
9589pub fn _mm512_maskz_fnmsub_round_ps<const ROUNDING: i32>(
9590 k: __mmask16,
9591 a: __m512,
9592 b: __m512,
9593 c: __m512,
9594) -> __m512 {
9595 unsafe {
9596 static_assert_rounding!(ROUNDING);
9597 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9598 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_ps())
9599 }
9600}
9601
9602/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9603///
9604/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9605/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9606/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9607/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9608/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9609/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9610///
9611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_ps&expand=2781)
9612#[inline]
9613#[target_feature(enable = "avx512f")]
9614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9615#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
9616#[rustc_legacy_const_generics(4)]
9617pub fn _mm512_mask3_fnmsub_round_ps<const ROUNDING: i32>(
9618 a: __m512,
9619 b: __m512,
9620 c: __m512,
9621 k: __mmask16,
9622) -> __m512 {
9623 unsafe {
9624 static_assert_rounding!(ROUNDING);
9625 let r: __m512 = vfmadd132psround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9626 simd_select_bitmask(m:k, yes:r, no:c)
9627 }
9628}
9629
9630/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst.\
9631///
9632/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9633/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9634/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9635/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9636/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9637/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9638///
9639/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fnmsub_round_pd&expand=2775)
9640#[inline]
9641#[target_feature(enable = "avx512f")]
9642#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9643#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9644#[rustc_legacy_const_generics(3)]
9645pub fn _mm512_fnmsub_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
9646 unsafe {
9647 static_assert_rounding!(ROUNDING);
9648 vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING)
9649 }
9650}
9651
9652/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).\
9653///
9654/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9655/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9656/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9657/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9658/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9659/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9660///
9661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fnmsub_round_pd&expand=2776)
9662#[inline]
9663#[target_feature(enable = "avx512f")]
9664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9665#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9666#[rustc_legacy_const_generics(4)]
9667pub fn _mm512_mask_fnmsub_round_pd<const ROUNDING: i32>(
9668 a: __m512d,
9669 k: __mmask8,
9670 b: __m512d,
9671 c: __m512d,
9672) -> __m512d {
9673 unsafe {
9674 static_assert_rounding!(ROUNDING);
9675 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9676 simd_select_bitmask(m:k, yes:r, no:a)
9677 }
9678}
9679
9680/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9681///
9682/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9683/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9684/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9685/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9686/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9687/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9688///
9689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fnmsub_round_pd&expand=2778)
9690#[inline]
9691#[target_feature(enable = "avx512f")]
9692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9693#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9694#[rustc_legacy_const_generics(4)]
9695pub fn _mm512_maskz_fnmsub_round_pd<const ROUNDING: i32>(
9696 k: __mmask8,
9697 a: __m512d,
9698 b: __m512d,
9699 c: __m512d,
9700) -> __m512d {
9701 unsafe {
9702 static_assert_rounding!(ROUNDING);
9703 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9704 simd_select_bitmask(m:k, yes:r, no:_mm512_setzero_pd())
9705 }
9706}
9707
9708/// Multiply packed double-precision (64-bit) floating-point elements in a and b, subtract packed elements in c from the negated intermediate result, and store the results in dst using writemask k (elements are copied from c when the corresponding mask bit is not set).\
9709///
9710/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
9711/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
9712/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
9713/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
9714/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
9715/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
9716///
9717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask3_fnmsub_round_pd&expand=2777)
9718#[inline]
9719#[target_feature(enable = "avx512f")]
9720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9721#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
9722#[rustc_legacy_const_generics(4)]
9723pub fn _mm512_mask3_fnmsub_round_pd<const ROUNDING: i32>(
9724 a: __m512d,
9725 b: __m512d,
9726 c: __m512d,
9727 k: __mmask8,
9728) -> __m512d {
9729 unsafe {
9730 static_assert_rounding!(ROUNDING);
9731 let r: __m512d = vfmadd132pdround(a:simd_neg(a), b, c:simd_neg(c), ROUNDING);
9732 simd_select_bitmask(m:k, yes:r, no:c)
9733 }
9734}
9735
9736/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9737/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9738///
9739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_ps&expand=3662)
9740#[inline]
9741#[target_feature(enable = "avx512f")]
9742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9743#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9744#[rustc_legacy_const_generics(2)]
9745pub fn _mm512_max_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9746 unsafe {
9747 static_assert_sae!(SAE);
9748 let a: f32x16 = a.as_f32x16();
9749 let b: f32x16 = b.as_f32x16();
9750 let r: f32x16 = vmaxps(a, b, SAE);
9751 transmute(src:r)
9752 }
9753}
9754
9755/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9756/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9757///
9758/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_ps&expand=3660)
9759#[inline]
9760#[target_feature(enable = "avx512f")]
9761#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9762#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9763#[rustc_legacy_const_generics(4)]
9764pub fn _mm512_mask_max_round_ps<const SAE: i32>(
9765 src: __m512,
9766 k: __mmask16,
9767 a: __m512,
9768 b: __m512,
9769) -> __m512 {
9770 unsafe {
9771 static_assert_sae!(SAE);
9772 let a: f32x16 = a.as_f32x16();
9773 let b: f32x16 = b.as_f32x16();
9774 let r: f32x16 = vmaxps(a, b, SAE);
9775 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
9776 }
9777}
9778
9779/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9781///
9782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_ps&expand=3661)
9783#[inline]
9784#[target_feature(enable = "avx512f")]
9785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9786#[cfg_attr(test, assert_instr(vmaxps, SAE = 8))]
9787#[rustc_legacy_const_generics(3)]
9788pub fn _mm512_maskz_max_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9789 unsafe {
9790 static_assert_sae!(SAE);
9791 let a: f32x16 = a.as_f32x16();
9792 let b: f32x16 = b.as_f32x16();
9793 let r: f32x16 = vmaxps(a, b, SAE);
9794 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
9795 }
9796}
9797
9798/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst.\
9799/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9800///
9801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_max_round_pd&expand=3659)
9802#[inline]
9803#[target_feature(enable = "avx512f")]
9804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9805#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9806#[rustc_legacy_const_generics(2)]
9807pub fn _mm512_max_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9808 unsafe {
9809 static_assert_sae!(SAE);
9810 let a: f64x8 = a.as_f64x8();
9811 let b: f64x8 = b.as_f64x8();
9812 let r: f64x8 = vmaxpd(a, b, SAE);
9813 transmute(src:r)
9814 }
9815}
9816
9817/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9818/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9819///
9820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_max_round_pd&expand=3657)
9821#[inline]
9822#[target_feature(enable = "avx512f")]
9823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9824#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9825#[rustc_legacy_const_generics(4)]
9826pub fn _mm512_mask_max_round_pd<const SAE: i32>(
9827 src: __m512d,
9828 k: __mmask8,
9829 a: __m512d,
9830 b: __m512d,
9831) -> __m512d {
9832 unsafe {
9833 static_assert_sae!(SAE);
9834 let a: f64x8 = a.as_f64x8();
9835 let b: f64x8 = b.as_f64x8();
9836 let r: f64x8 = vmaxpd(a, b, SAE);
9837 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
9838 }
9839}
9840
9841/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed maximum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9842/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9843///
9844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_max_round_pd&expand=3658)
9845#[inline]
9846#[target_feature(enable = "avx512f")]
9847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9848#[cfg_attr(test, assert_instr(vmaxpd, SAE = 8))]
9849#[rustc_legacy_const_generics(3)]
9850pub fn _mm512_maskz_max_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9851 unsafe {
9852 static_assert_sae!(SAE);
9853 let a: f64x8 = a.as_f64x8();
9854 let b: f64x8 = b.as_f64x8();
9855 let r: f64x8 = vmaxpd(a, b, SAE);
9856 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
9857 }
9858}
9859
9860/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9861/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9862///
9863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_ps&expand=3776)
9864#[inline]
9865#[target_feature(enable = "avx512f")]
9866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9867#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9868#[rustc_legacy_const_generics(2)]
9869pub fn _mm512_min_round_ps<const SAE: i32>(a: __m512, b: __m512) -> __m512 {
9870 unsafe {
9871 static_assert_sae!(SAE);
9872 let a: f32x16 = a.as_f32x16();
9873 let b: f32x16 = b.as_f32x16();
9874 let r: f32x16 = vminps(a, b, SAE);
9875 transmute(src:r)
9876 }
9877}
9878
9879/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9880/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9881///
9882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_ps&expand=3774)
9883#[inline]
9884#[target_feature(enable = "avx512f")]
9885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9886#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9887#[rustc_legacy_const_generics(4)]
9888pub fn _mm512_mask_min_round_ps<const SAE: i32>(
9889 src: __m512,
9890 k: __mmask16,
9891 a: __m512,
9892 b: __m512,
9893) -> __m512 {
9894 unsafe {
9895 static_assert_sae!(SAE);
9896 let a: f32x16 = a.as_f32x16();
9897 let b: f32x16 = b.as_f32x16();
9898 let r: f32x16 = vminps(a, b, SAE);
9899 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
9900 }
9901}
9902
9903/// Compare packed single-precision (32-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9904/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9905///
9906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_ps&expand=3775)
9907#[inline]
9908#[target_feature(enable = "avx512f")]
9909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9910#[cfg_attr(test, assert_instr(vminps, SAE = 8))]
9911#[rustc_legacy_const_generics(3)]
9912pub fn _mm512_maskz_min_round_ps<const SAE: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
9913 unsafe {
9914 static_assert_sae!(SAE);
9915 let a: f32x16 = a.as_f32x16();
9916 let b: f32x16 = b.as_f32x16();
9917 let r: f32x16 = vminps(a, b, SAE);
9918 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
9919 }
9920}
9921
9922/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst.\
9923/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9924///
9925/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_min_round_pd&expand=3773)
9926#[inline]
9927#[target_feature(enable = "avx512f")]
9928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9929#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9930#[rustc_legacy_const_generics(2)]
9931pub fn _mm512_min_round_pd<const SAE: i32>(a: __m512d, b: __m512d) -> __m512d {
9932 unsafe {
9933 static_assert_sae!(SAE);
9934 let a: f64x8 = a.as_f64x8();
9935 let b: f64x8 = b.as_f64x8();
9936 let r: f64x8 = vminpd(a, b, SAE);
9937 transmute(src:r)
9938 }
9939}
9940
9941/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
9942/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9943///
9944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_min_round_pd&expand=3771)
9945#[inline]
9946#[target_feature(enable = "avx512f")]
9947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9948#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9949#[rustc_legacy_const_generics(4)]
9950pub fn _mm512_mask_min_round_pd<const SAE: i32>(
9951 src: __m512d,
9952 k: __mmask8,
9953 a: __m512d,
9954 b: __m512d,
9955) -> __m512d {
9956 unsafe {
9957 static_assert_sae!(SAE);
9958 let a: f64x8 = a.as_f64x8();
9959 let b: f64x8 = b.as_f64x8();
9960 let r: f64x8 = vminpd(a, b, SAE);
9961 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f64x8()))
9962 }
9963}
9964
9965/// Compare packed double-precision (64-bit) floating-point elements in a and b, and store packed minimum values in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
9966/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9967///
9968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_min_round_pd&expand=3772)
9969#[inline]
9970#[target_feature(enable = "avx512f")]
9971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9972#[cfg_attr(test, assert_instr(vminpd, SAE = 8))]
9973#[rustc_legacy_const_generics(3)]
9974pub fn _mm512_maskz_min_round_pd<const SAE: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
9975 unsafe {
9976 static_assert_sae!(SAE);
9977 let a: f64x8 = a.as_f64x8();
9978 let b: f64x8 = b.as_f64x8();
9979 let r: f64x8 = vminpd(a, b, SAE);
9980 transmute(src:simd_select_bitmask(m:k, yes:r, no:f64x8::ZERO))
9981 }
9982}
9983
9984/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
9985/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
9986///
9987/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_ps&expand=2850)
9988#[inline]
9989#[target_feature(enable = "avx512f")]
9990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
9991#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
9992#[rustc_legacy_const_generics(1)]
9993pub fn _mm512_getexp_round_ps<const SAE: i32>(a: __m512) -> __m512 {
9994 unsafe {
9995 static_assert_sae!(SAE);
9996 let a: f32x16 = a.as_f32x16();
9997 let r: f32x16 = vgetexpps(a, src:f32x16::ZERO, m:0b11111111_11111111, SAE);
9998 transmute(src:r)
9999 }
10000}
10001
10002/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10003/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10004///
10005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_ps&expand=2851)
10006#[inline]
10007#[target_feature(enable = "avx512f")]
10008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10009#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10010#[rustc_legacy_const_generics(3)]
10011pub fn _mm512_mask_getexp_round_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
10012 unsafe {
10013 static_assert_sae!(SAE);
10014 let a: f32x16 = a.as_f32x16();
10015 let src: f32x16 = src.as_f32x16();
10016 let r: f32x16 = vgetexpps(a, src, m:k, SAE);
10017 transmute(src:r)
10018 }
10019}
10020
10021/// Convert the exponent of each packed single-precision (32-bit) floating-point element in a to a single-precision (32-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10022/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10023///
10024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_ps&expand=2852)
10025#[inline]
10026#[target_feature(enable = "avx512f")]
10027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10028#[cfg_attr(test, assert_instr(vgetexpps, SAE = 8))]
10029#[rustc_legacy_const_generics(2)]
10030pub fn _mm512_maskz_getexp_round_ps<const SAE: i32>(k: __mmask16, a: __m512) -> __m512 {
10031 unsafe {
10032 static_assert_sae!(SAE);
10033 let a: f32x16 = a.as_f32x16();
10034 let r: f32x16 = vgetexpps(a, src:f32x16::ZERO, m:k, SAE);
10035 transmute(src:r)
10036 }
10037}
10038
10039/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst. This intrinsic essentially calculates floor(log2(x)) for each element.\
10040/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10041///
10042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getexp_round_pd&expand=2847)
10043#[inline]
10044#[target_feature(enable = "avx512f")]
10045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10046#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10047#[rustc_legacy_const_generics(1)]
10048pub fn _mm512_getexp_round_pd<const SAE: i32>(a: __m512d) -> __m512d {
10049 unsafe {
10050 static_assert_sae!(SAE);
10051 let a: f64x8 = a.as_f64x8();
10052 let r: f64x8 = vgetexppd(a, src:f64x8::ZERO, m:0b11111111, SAE);
10053 transmute(src:r)
10054 }
10055}
10056
10057/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10058/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10059///
10060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getexp_round_pd&expand=2848)
10061#[inline]
10062#[target_feature(enable = "avx512f")]
10063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10064#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10065#[rustc_legacy_const_generics(3)]
10066pub fn _mm512_mask_getexp_round_pd<const SAE: i32>(
10067 src: __m512d,
10068 k: __mmask8,
10069 a: __m512d,
10070) -> __m512d {
10071 unsafe {
10072 static_assert_sae!(SAE);
10073 let a: f64x8 = a.as_f64x8();
10074 let src: f64x8 = src.as_f64x8();
10075 let r: f64x8 = vgetexppd(a, src, m:k, SAE);
10076 transmute(src:r)
10077 }
10078}
10079
10080/// Convert the exponent of each packed double-precision (64-bit) floating-point element in a to a double-precision (64-bit) floating-point number representing the integer exponent, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates floor(log2(x)) for each element.\
10081/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10082///
10083/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getexp_round_pd&expand=2849)
10084#[inline]
10085#[target_feature(enable = "avx512f")]
10086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10087#[cfg_attr(test, assert_instr(vgetexppd, SAE = 8))]
10088#[rustc_legacy_const_generics(2)]
10089pub fn _mm512_maskz_getexp_round_pd<const SAE: i32>(k: __mmask8, a: __m512d) -> __m512d {
10090 unsafe {
10091 static_assert_sae!(SAE);
10092 let a: f64x8 = a.as_f64x8();
10093 let r: f64x8 = vgetexppd(a, src:f64x8::ZERO, m:k, SAE);
10094 transmute(src:r)
10095 }
10096}
10097
10098/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10099/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10100/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10101/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10102/// * [`_MM_FROUND_TO_POS_INF`] : round up
10103/// * [`_MM_FROUND_TO_ZERO`] : truncate
10104/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10105///
10106/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_ps&expand=4790)
10108#[inline]
10109#[target_feature(enable = "avx512f")]
10110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10111#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10112#[rustc_legacy_const_generics(1, 2)]
10113pub fn _mm512_roundscale_round_ps<const IMM8: i32, const SAE: i32>(a: __m512) -> __m512 {
10114 unsafe {
10115 static_assert_uimm_bits!(IMM8, 8);
10116 static_assert_mantissas_sae!(SAE);
10117 let a: f32x16 = a.as_f32x16();
10118 let r: f32x16 = vrndscaleps(a, IMM8, src:f32x16::ZERO, mask:0b11111111_11111111, SAE);
10119 transmute(src:r)
10120 }
10121}
10122
10123/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10124/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10125/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10126/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10127/// * [`_MM_FROUND_TO_POS_INF`] : round up
10128/// * [`_MM_FROUND_TO_ZERO`] : truncate
10129/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10130///
10131/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_ps&expand=4788)
10133#[inline]
10134#[target_feature(enable = "avx512f")]
10135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10136#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10137#[rustc_legacy_const_generics(3, 4)]
10138pub fn _mm512_mask_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10139 src: __m512,
10140 k: __mmask16,
10141 a: __m512,
10142) -> __m512 {
10143 unsafe {
10144 static_assert_uimm_bits!(IMM8, 8);
10145 static_assert_mantissas_sae!(SAE);
10146 let a: f32x16 = a.as_f32x16();
10147 let src: f32x16 = src.as_f32x16();
10148 let r: f32x16 = vrndscaleps(a, IMM8, src, mask:k, SAE);
10149 transmute(src:r)
10150 }
10151}
10152
10153/// Round packed single-precision (32-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10154/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10155/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10156/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10157/// * [`_MM_FROUND_TO_POS_INF`] : round up
10158/// * [`_MM_FROUND_TO_ZERO`] : truncate
10159/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10160///
10161/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10162/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_ps&expand=4789)
10163#[inline]
10164#[target_feature(enable = "avx512f")]
10165#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10166#[cfg_attr(test, assert_instr(vrndscaleps, IMM8 = 0, SAE = 8))]
10167#[rustc_legacy_const_generics(2, 3)]
10168pub fn _mm512_maskz_roundscale_round_ps<const IMM8: i32, const SAE: i32>(
10169 k: __mmask16,
10170 a: __m512,
10171) -> __m512 {
10172 unsafe {
10173 static_assert_uimm_bits!(IMM8, 8);
10174 static_assert_mantissas_sae!(SAE);
10175 let a: f32x16 = a.as_f32x16();
10176 let r: f32x16 = vrndscaleps(a, IMM8, src:f32x16::ZERO, mask:k, SAE);
10177 transmute(src:r)
10178 }
10179}
10180
10181/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst.\
10182/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10183/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10184/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10185/// * [`_MM_FROUND_TO_POS_INF`] : round up
10186/// * [`_MM_FROUND_TO_ZERO`] : truncate
10187/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10188///
10189/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_roundscale_round_pd&expand=4787)
10191#[inline]
10192#[target_feature(enable = "avx512f")]
10193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10194#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10195#[rustc_legacy_const_generics(1, 2)]
10196pub fn _mm512_roundscale_round_pd<const IMM8: i32, const SAE: i32>(a: __m512d) -> __m512d {
10197 unsafe {
10198 static_assert_uimm_bits!(IMM8, 8);
10199 static_assert_mantissas_sae!(SAE);
10200 let a: f64x8 = a.as_f64x8();
10201 let r: f64x8 = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:0b11111111, SAE);
10202 transmute(src:r)
10203 }
10204}
10205
10206/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10207/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10208/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10209/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10210/// * [`_MM_FROUND_TO_POS_INF`] : round up
10211/// * [`_MM_FROUND_TO_ZERO`] : truncate
10212/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10213///
10214/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10215/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_roundscale_round_pd&expand=4785)
10216#[inline]
10217#[target_feature(enable = "avx512f")]
10218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10219#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10220#[rustc_legacy_const_generics(3, 4)]
10221pub fn _mm512_mask_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10222 src: __m512d,
10223 k: __mmask8,
10224 a: __m512d,
10225) -> __m512d {
10226 unsafe {
10227 static_assert_uimm_bits!(IMM8, 8);
10228 static_assert_mantissas_sae!(SAE);
10229 let a: f64x8 = a.as_f64x8();
10230 let src: f64x8 = src.as_f64x8();
10231 let r: f64x8 = vrndscalepd(a, IMM8, src, mask:k, SAE);
10232 transmute(src:r)
10233 }
10234}
10235
10236/// Round packed double-precision (64-bit) floating-point elements in a to the number of fraction bits specified by imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10237/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
10238/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
10239/// * [`_MM_FROUND_TO_NEG_INF`] : round down
10240/// * [`_MM_FROUND_TO_POS_INF`] : round up
10241/// * [`_MM_FROUND_TO_ZERO`] : truncate
10242/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10243///
10244/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_roundscale_round_pd&expand=4786)
10246#[inline]
10247#[target_feature(enable = "avx512f")]
10248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10249#[cfg_attr(test, assert_instr(vrndscalepd, IMM8 = 0, SAE = 8))]
10250#[rustc_legacy_const_generics(2, 3)]
10251pub fn _mm512_maskz_roundscale_round_pd<const IMM8: i32, const SAE: i32>(
10252 k: __mmask8,
10253 a: __m512d,
10254) -> __m512d {
10255 unsafe {
10256 static_assert_uimm_bits!(IMM8, 8);
10257 static_assert_mantissas_sae!(SAE);
10258 let a: f64x8 = a.as_f64x8();
10259 let r: f64x8 = vrndscalepd(a, IMM8, src:f64x8::ZERO, mask:k, SAE);
10260 transmute(src:r)
10261 }
10262}
10263
10264/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst.\
10265///
10266/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10267/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10268/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10269/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10270/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10271/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10272///
10273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_ps&expand=4889)
10274#[inline]
10275#[target_feature(enable = "avx512f")]
10276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10277#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10278#[rustc_legacy_const_generics(2)]
10279pub fn _mm512_scalef_round_ps<const ROUNDING: i32>(a: __m512, b: __m512) -> __m512 {
10280 unsafe {
10281 static_assert_rounding!(ROUNDING);
10282 let a: f32x16 = a.as_f32x16();
10283 let b: f32x16 = b.as_f32x16();
10284 let r: f32x16 = vscalefps(a, b, src:f32x16::ZERO, mask:0b11111111_11111111, ROUNDING);
10285 transmute(src:r)
10286 }
10287}
10288
10289/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10290///
10291/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10292/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10293/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10294/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10295/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10296/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10297///
10298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_ps&expand=4887)
10299#[inline]
10300#[target_feature(enable = "avx512f")]
10301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10302#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10303#[rustc_legacy_const_generics(4)]
10304pub fn _mm512_mask_scalef_round_ps<const ROUNDING: i32>(
10305 src: __m512,
10306 k: __mmask16,
10307 a: __m512,
10308 b: __m512,
10309) -> __m512 {
10310 unsafe {
10311 static_assert_rounding!(ROUNDING);
10312 let a: f32x16 = a.as_f32x16();
10313 let b: f32x16 = b.as_f32x16();
10314 let src: f32x16 = src.as_f32x16();
10315 let r: f32x16 = vscalefps(a, b, src, mask:k, ROUNDING);
10316 transmute(src:r)
10317 }
10318}
10319
10320/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10321///
10322/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10323/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10324/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10325/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10326/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10327/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10328///
10329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_ps&expand=4888)
10330#[inline]
10331#[target_feature(enable = "avx512f")]
10332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10333#[cfg_attr(test, assert_instr(vscalefps, ROUNDING = 8))]
10334#[rustc_legacy_const_generics(3)]
10335pub fn _mm512_maskz_scalef_round_ps<const ROUNDING: i32>(
10336 k: __mmask16,
10337 a: __m512,
10338 b: __m512,
10339) -> __m512 {
10340 unsafe {
10341 static_assert_rounding!(ROUNDING);
10342 let a: f32x16 = a.as_f32x16();
10343 let b: f32x16 = b.as_f32x16();
10344 let r: f32x16 = vscalefps(a, b, src:f32x16::ZERO, mask:k, ROUNDING);
10345 transmute(src:r)
10346 }
10347}
10348
10349/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst.\
10350///
10351/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10352/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10353/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10354/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10355/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10356/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10357///
10358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_scalef_round_pd&expand=4886)
10359#[inline]
10360#[target_feature(enable = "avx512f")]
10361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10362#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10363#[rustc_legacy_const_generics(2)]
10364pub fn _mm512_scalef_round_pd<const ROUNDING: i32>(a: __m512d, b: __m512d) -> __m512d {
10365 unsafe {
10366 static_assert_rounding!(ROUNDING);
10367 let a: f64x8 = a.as_f64x8();
10368 let b: f64x8 = b.as_f64x8();
10369 let r: f64x8 = vscalefpd(a, b, src:f64x8::ZERO, mask:0b11111111, ROUNDING);
10370 transmute(src:r)
10371 }
10372}
10373
10374/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
10375///
10376/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10377/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10378/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10379/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10380/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10381/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10382///
10383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_scalef_round_pd&expand=4884)
10384#[inline]
10385#[target_feature(enable = "avx512f")]
10386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10387#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10388#[rustc_legacy_const_generics(4)]
10389pub fn _mm512_mask_scalef_round_pd<const ROUNDING: i32>(
10390 src: __m512d,
10391 k: __mmask8,
10392 a: __m512d,
10393 b: __m512d,
10394) -> __m512d {
10395 unsafe {
10396 static_assert_rounding!(ROUNDING);
10397 let a: f64x8 = a.as_f64x8();
10398 let b: f64x8 = b.as_f64x8();
10399 let src: f64x8 = src.as_f64x8();
10400 let r: f64x8 = vscalefpd(a, b, src, mask:k, ROUNDING);
10401 transmute(src:r)
10402 }
10403}
10404
10405/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
10406///
10407/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
10408/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
10409/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
10410/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
10411/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
10412/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
10413///
10414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_scalef_round_pd&expand=4885)
10415#[inline]
10416#[target_feature(enable = "avx512f")]
10417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10418#[cfg_attr(test, assert_instr(vscalefpd, ROUNDING = 8))]
10419#[rustc_legacy_const_generics(3)]
10420pub fn _mm512_maskz_scalef_round_pd<const ROUNDING: i32>(
10421 k: __mmask8,
10422 a: __m512d,
10423 b: __m512d,
10424) -> __m512d {
10425 unsafe {
10426 static_assert_rounding!(ROUNDING);
10427 let a: f64x8 = a.as_f64x8();
10428 let b: f64x8 = b.as_f64x8();
10429 let r: f64x8 = vscalefpd(a, b, src:f64x8::ZERO, mask:k, ROUNDING);
10430 transmute(src:r)
10431 }
10432}
10433
10434/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10435///
10436/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_ps&expand=2505)
10438#[inline]
10439#[target_feature(enable = "avx512f")]
10440#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10441#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10442#[rustc_legacy_const_generics(3, 4)]
10443pub fn _mm512_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10444 a: __m512,
10445 b: __m512,
10446 c: __m512i,
10447) -> __m512 {
10448 unsafe {
10449 static_assert_uimm_bits!(IMM8, 8);
10450 static_assert_mantissas_sae!(SAE);
10451 let a: f32x16 = a.as_f32x16();
10452 let b: f32x16 = b.as_f32x16();
10453 let c: i32x16 = c.as_i32x16();
10454 let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:0b11111111_11111111, SAE);
10455 transmute(src:r)
10456 }
10457}
10458
10459/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10460///
10461/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_ps&expand=2506)
10463#[inline]
10464#[target_feature(enable = "avx512f")]
10465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10466#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10467#[rustc_legacy_const_generics(4, 5)]
10468pub fn _mm512_mask_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10469 a: __m512,
10470 k: __mmask16,
10471 b: __m512,
10472 c: __m512i,
10473) -> __m512 {
10474 unsafe {
10475 static_assert_uimm_bits!(IMM8, 8);
10476 static_assert_mantissas_sae!(SAE);
10477 let a: f32x16 = a.as_f32x16();
10478 let b: f32x16 = b.as_f32x16();
10479 let c: i32x16 = c.as_i32x16();
10480 let r: f32x16 = vfixupimmps(a, b, c, IMM8, mask:k, SAE);
10481 transmute(src:r)
10482 }
10483}
10484
10485/// Fix up packed single-precision (32-bit) floating-point elements in a and b using packed 32-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10486///
10487/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10488/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_ps&expand=2507)
10489#[inline]
10490#[target_feature(enable = "avx512f")]
10491#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10492#[cfg_attr(test, assert_instr(vfixupimmps, IMM8 = 0, SAE = 8))]
10493#[rustc_legacy_const_generics(4, 5)]
10494pub fn _mm512_maskz_fixupimm_round_ps<const IMM8: i32, const SAE: i32>(
10495 k: __mmask16,
10496 a: __m512,
10497 b: __m512,
10498 c: __m512i,
10499) -> __m512 {
10500 unsafe {
10501 static_assert_uimm_bits!(IMM8, 8);
10502 static_assert_mantissas_sae!(SAE);
10503 let a: f32x16 = a.as_f32x16();
10504 let b: f32x16 = b.as_f32x16();
10505 let c: i32x16 = c.as_i32x16();
10506 let r: f32x16 = vfixupimmpsz(a, b, c, IMM8, mask:k, SAE);
10507 transmute(src:r)
10508 }
10509}
10510
10511/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst. imm8 is used to set the required flags reporting.\
10512///
10513/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_fixupimm_round_pd&expand=2502)
10515#[inline]
10516#[target_feature(enable = "avx512f")]
10517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10518#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10519#[rustc_legacy_const_generics(3, 4)]
10520pub fn _mm512_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10521 a: __m512d,
10522 b: __m512d,
10523 c: __m512i,
10524) -> __m512d {
10525 unsafe {
10526 static_assert_uimm_bits!(IMM8, 8);
10527 static_assert_mantissas_sae!(SAE);
10528 let a: f64x8 = a.as_f64x8();
10529 let b: f64x8 = b.as_f64x8();
10530 let c: i64x8 = c.as_i64x8();
10531 let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:0b11111111, SAE);
10532 transmute(src:r)
10533 }
10534}
10535
10536/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10537///
10538/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10539/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_fixupimm_round_pd&expand=2503)
10540#[inline]
10541#[target_feature(enable = "avx512f")]
10542#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10543#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10544#[rustc_legacy_const_generics(4, 5)]
10545pub fn _mm512_mask_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10546 a: __m512d,
10547 k: __mmask8,
10548 b: __m512d,
10549 c: __m512i,
10550) -> __m512d {
10551 unsafe {
10552 static_assert_uimm_bits!(IMM8, 8);
10553 static_assert_mantissas_sae!(SAE);
10554 let a: f64x8 = a.as_f64x8();
10555 let b: f64x8 = b.as_f64x8();
10556 let c: i64x8 = c.as_i64x8();
10557 let r: f64x8 = vfixupimmpd(a, b, c, IMM8, mask:k, SAE);
10558 transmute(src:r)
10559 }
10560}
10561
10562/// Fix up packed double-precision (64-bit) floating-point elements in a and b using packed 64-bit integers in c, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). imm8 is used to set the required flags reporting.\
10563///
10564/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_fixupimm_round_pd&expand=2504)
10566#[inline]
10567#[target_feature(enable = "avx512f")]
10568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10569#[cfg_attr(test, assert_instr(vfixupimmpd, IMM8 = 0, SAE = 8))]
10570#[rustc_legacy_const_generics(4, 5)]
10571pub fn _mm512_maskz_fixupimm_round_pd<const IMM8: i32, const SAE: i32>(
10572 k: __mmask8,
10573 a: __m512d,
10574 b: __m512d,
10575 c: __m512i,
10576) -> __m512d {
10577 unsafe {
10578 static_assert_uimm_bits!(IMM8, 8);
10579 static_assert_mantissas_sae!(SAE);
10580 let a: f64x8 = a.as_f64x8();
10581 let b: f64x8 = b.as_f64x8();
10582 let c: i64x8 = c.as_i64x8();
10583 let r: f64x8 = vfixupimmpdz(a, b, c, IMM8, mask:k, SAE);
10584 transmute(src:r)
10585 }
10586}
10587
10588/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10589/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10590/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10591/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10592/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10593/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10594/// The sign is determined by sc which can take the following values:\
10595/// _MM_MANT_SIGN_src // sign = sign(src)\
10596/// _MM_MANT_SIGN_zero // sign = 0\
10597/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10598/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10599///
10600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_ps&expand=2886)
10601#[inline]
10602#[target_feature(enable = "avx512f")]
10603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10604#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10605#[rustc_legacy_const_generics(1, 2, 3)]
10606pub fn _mm512_getmant_round_ps<
10607 const NORM: _MM_MANTISSA_NORM_ENUM,
10608 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10609 const SAE: i32,
10610>(
10611 a: __m512,
10612) -> __m512 {
10613 unsafe {
10614 static_assert_uimm_bits!(NORM, 4);
10615 static_assert_uimm_bits!(SIGN, 2);
10616 static_assert_mantissas_sae!(SAE);
10617 let a: f32x16 = a.as_f32x16();
10618 let r: f32x16 = vgetmantps(a, SIGN << 2 | NORM, src:f32x16::ZERO, m:0b11111111_11111111, SAE);
10619 transmute(src:r)
10620 }
10621}
10622
10623/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10624/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10625/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10626/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10627/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10628/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10629/// The sign is determined by sc which can take the following values:\
10630/// _MM_MANT_SIGN_src // sign = sign(src)\
10631/// _MM_MANT_SIGN_zero // sign = 0\
10632/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10633/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10634///
10635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_ps&expand=2887)
10636#[inline]
10637#[target_feature(enable = "avx512f")]
10638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10639#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10640#[rustc_legacy_const_generics(3, 4, 5)]
10641pub fn _mm512_mask_getmant_round_ps<
10642 const NORM: _MM_MANTISSA_NORM_ENUM,
10643 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10644 const SAE: i32,
10645>(
10646 src: __m512,
10647 k: __mmask16,
10648 a: __m512,
10649) -> __m512 {
10650 unsafe {
10651 static_assert_uimm_bits!(NORM, 4);
10652 static_assert_uimm_bits!(SIGN, 2);
10653 static_assert_mantissas_sae!(SAE);
10654 let a: f32x16 = a.as_f32x16();
10655 let src: f32x16 = src.as_f32x16();
10656 let r: f32x16 = vgetmantps(a, SIGN << 2 | NORM, src, m:k, SAE);
10657 transmute(src:r)
10658 }
10659}
10660
10661/// Normalize the mantissas of packed single-precision (32-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10662/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10663/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10664/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10665/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10666/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10667/// The sign is determined by sc which can take the following values:\
10668/// _MM_MANT_SIGN_src // sign = sign(src)\
10669/// _MM_MANT_SIGN_zero // sign = 0\
10670/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10671/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10672///
10673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_ps&expand=2888)
10674#[inline]
10675#[target_feature(enable = "avx512f")]
10676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10677#[cfg_attr(test, assert_instr(vgetmantps, NORM = 0, SIGN = 0, SAE = 4))]
10678#[rustc_legacy_const_generics(2, 3, 4)]
10679pub fn _mm512_maskz_getmant_round_ps<
10680 const NORM: _MM_MANTISSA_NORM_ENUM,
10681 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10682 const SAE: i32,
10683>(
10684 k: __mmask16,
10685 a: __m512,
10686) -> __m512 {
10687 unsafe {
10688 static_assert_uimm_bits!(NORM, 4);
10689 static_assert_uimm_bits!(SIGN, 2);
10690 static_assert_mantissas_sae!(SAE);
10691 let a: f32x16 = a.as_f32x16();
10692 let r: f32x16 = vgetmantps(a, SIGN << 2 | NORM, src:f32x16::ZERO, m:k, SAE);
10693 transmute(src:r)
10694 }
10695}
10696
10697/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10698/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10699/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10700/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10701/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10702/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10703/// The sign is determined by sc which can take the following values:\
10704/// _MM_MANT_SIGN_src // sign = sign(src)\
10705/// _MM_MANT_SIGN_zero // sign = 0\
10706/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10707/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10708///
10709/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_getmant_round_pd&expand=2883)
10710#[inline]
10711#[target_feature(enable = "avx512f")]
10712#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10713#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10714#[rustc_legacy_const_generics(1, 2, 3)]
10715pub fn _mm512_getmant_round_pd<
10716 const NORM: _MM_MANTISSA_NORM_ENUM,
10717 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10718 const SAE: i32,
10719>(
10720 a: __m512d,
10721) -> __m512d {
10722 unsafe {
10723 static_assert_uimm_bits!(NORM, 4);
10724 static_assert_uimm_bits!(SIGN, 2);
10725 static_assert_mantissas_sae!(SAE);
10726 let a: f64x8 = a.as_f64x8();
10727 let r: f64x8 = vgetmantpd(a, SIGN << 2 | NORM, src:f64x8::ZERO, m:0b11111111, SAE);
10728 transmute(src:r)
10729 }
10730}
10731
10732/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10733/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10734/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10735/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10736/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10737/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10738/// The sign is determined by sc which can take the following values:\
10739/// _MM_MANT_SIGN_src // sign = sign(src)\
10740/// _MM_MANT_SIGN_zero // sign = 0\
10741/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10742/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10743///
10744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_getmant_round_pd&expand=2884)
10745#[inline]
10746#[target_feature(enable = "avx512f")]
10747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10748#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10749#[rustc_legacy_const_generics(3, 4, 5)]
10750pub fn _mm512_mask_getmant_round_pd<
10751 const NORM: _MM_MANTISSA_NORM_ENUM,
10752 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10753 const SAE: i32,
10754>(
10755 src: __m512d,
10756 k: __mmask8,
10757 a: __m512d,
10758) -> __m512d {
10759 unsafe {
10760 static_assert_uimm_bits!(NORM, 4);
10761 static_assert_uimm_bits!(SIGN, 2);
10762 static_assert_mantissas_sae!(SAE);
10763 let a: f64x8 = a.as_f64x8();
10764 let src: f64x8 = src.as_f64x8();
10765 let r: f64x8 = vgetmantpd(a, SIGN << 2 | NORM, src, m:k, SAE);
10766 transmute(src:r)
10767 }
10768}
10769
10770/// Normalize the mantissas of packed double-precision (64-bit) floating-point elements in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
10771/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
10772/// _MM_MANT_NORM_1_2 // interval [1, 2)\
10773/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
10774/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
10775/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
10776/// The sign is determined by sc which can take the following values:\
10777/// _MM_MANT_SIGN_src // sign = sign(src)\
10778/// _MM_MANT_SIGN_zero // sign = 0\
10779/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
10780/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
10781///
10782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_getmant_round_pd&expand=2885)
10783#[inline]
10784#[target_feature(enable = "avx512f")]
10785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10786#[cfg_attr(test, assert_instr(vgetmantpd, NORM = 0, SIGN = 0, SAE = 4))]
10787#[rustc_legacy_const_generics(2, 3, 4)]
10788pub fn _mm512_maskz_getmant_round_pd<
10789 const NORM: _MM_MANTISSA_NORM_ENUM,
10790 const SIGN: _MM_MANTISSA_SIGN_ENUM,
10791 const SAE: i32,
10792>(
10793 k: __mmask8,
10794 a: __m512d,
10795) -> __m512d {
10796 unsafe {
10797 static_assert_uimm_bits!(NORM, 4);
10798 static_assert_uimm_bits!(SIGN, 2);
10799 static_assert_mantissas_sae!(SAE);
10800 let a: f64x8 = a.as_f64x8();
10801 let r: f64x8 = vgetmantpd(a, SIGN << 2 | NORM, src:f64x8::ZERO, m:k, SAE);
10802 transmute(src:r)
10803 }
10804}
10805
10806/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
10807///
10808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epi32&expand=1737)
10809#[inline]
10810#[target_feature(enable = "avx512f")]
10811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10812#[cfg_attr(test, assert_instr(vcvtps2dq))]
10813pub fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
10814 unsafe {
10815 transmute(src:vcvtps2dq(
10816 a.as_f32x16(),
10817 src:i32x16::ZERO,
10818 mask:0b11111111_11111111,
10819 _MM_FROUND_CUR_DIRECTION,
10820 ))
10821 }
10822}
10823
10824/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10825///
10826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epi32&expand=1738)
10827#[inline]
10828#[target_feature(enable = "avx512f")]
10829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10830#[cfg_attr(test, assert_instr(vcvtps2dq))]
10831pub fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10832 unsafe {
10833 transmute(src:vcvtps2dq(
10834 a.as_f32x16(),
10835 src.as_i32x16(),
10836 mask:k,
10837 _MM_FROUND_CUR_DIRECTION,
10838 ))
10839 }
10840}
10841
10842/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10843///
10844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epi32&expand=1739)
10845#[inline]
10846#[target_feature(enable = "avx512f")]
10847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10848#[cfg_attr(test, assert_instr(vcvtps2dq))]
10849pub fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
10850 unsafe {
10851 transmute(src:vcvtps2dq(
10852 a.as_f32x16(),
10853 src:i32x16::ZERO,
10854 mask:k,
10855 _MM_FROUND_CUR_DIRECTION,
10856 ))
10857 }
10858}
10859
10860/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10861///
10862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epi32&expand=1735)
10863#[inline]
10864#[target_feature(enable = "avx512f,avx512vl")]
10865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10866#[cfg_attr(test, assert_instr(vcvtps2dq))]
10867pub fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10868 unsafe {
10869 let convert: __m256i = _mm256_cvtps_epi32(a);
10870 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x8(), no:src.as_i32x8()))
10871 }
10872}
10873
10874/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10875///
10876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epi32&expand=1736)
10877#[inline]
10878#[target_feature(enable = "avx512f,avx512vl")]
10879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10880#[cfg_attr(test, assert_instr(vcvtps2dq))]
10881pub fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
10882 unsafe {
10883 let convert: __m256i = _mm256_cvtps_epi32(a);
10884 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x8(), no:i32x8::ZERO))
10885 }
10886}
10887
10888/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10889///
10890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epi32&expand=1732)
10891#[inline]
10892#[target_feature(enable = "avx512f,avx512vl")]
10893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10894#[cfg_attr(test, assert_instr(vcvtps2dq))]
10895pub fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
10896 unsafe {
10897 let convert: __m128i = _mm_cvtps_epi32(a);
10898 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
10899 }
10900}
10901
10902/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10903///
10904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epi32&expand=1733)
10905#[inline]
10906#[target_feature(enable = "avx512f,avx512vl")]
10907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10908#[cfg_attr(test, assert_instr(vcvtps2dq))]
10909pub fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
10910 unsafe {
10911 let convert: __m128i = _mm_cvtps_epi32(a);
10912 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:i32x4::ZERO))
10913 }
10914}
10915
10916/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10917///
10918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_epu32&expand=1755)
10919#[inline]
10920#[target_feature(enable = "avx512f")]
10921#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10922#[cfg_attr(test, assert_instr(vcvtps2udq))]
10923pub fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
10924 unsafe {
10925 transmute(src:vcvtps2udq(
10926 a.as_f32x16(),
10927 src:u32x16::ZERO,
10928 mask:0b11111111_11111111,
10929 _MM_FROUND_CUR_DIRECTION,
10930 ))
10931 }
10932}
10933
10934/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10935///
10936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_epu32&expand=1756)
10937#[inline]
10938#[target_feature(enable = "avx512f")]
10939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10940#[cfg_attr(test, assert_instr(vcvtps2udq))]
10941pub fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
10942 unsafe {
10943 transmute(src:vcvtps2udq(
10944 a.as_f32x16(),
10945 src.as_u32x16(),
10946 mask:k,
10947 _MM_FROUND_CUR_DIRECTION,
10948 ))
10949 }
10950}
10951
10952/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10953///
10954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_epu32&expand=1343)
10955#[inline]
10956#[target_feature(enable = "avx512f")]
10957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10958#[cfg_attr(test, assert_instr(vcvtps2udq))]
10959pub fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
10960 unsafe {
10961 transmute(src:vcvtps2udq(
10962 a.as_f32x16(),
10963 src:u32x16::ZERO,
10964 mask:k,
10965 _MM_FROUND_CUR_DIRECTION,
10966 ))
10967 }
10968}
10969
10970/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
10971///
10972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtps_epu32&expand=1752)
10973#[inline]
10974#[target_feature(enable = "avx512f,avx512vl")]
10975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10976#[cfg_attr(test, assert_instr(vcvtps2udq))]
10977pub fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
10978 unsafe { transmute(src:vcvtps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:0b11111111)) }
10979}
10980
10981/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
10982///
10983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_epu32&expand=1753)
10984#[inline]
10985#[target_feature(enable = "avx512f,avx512vl")]
10986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10987#[cfg_attr(test, assert_instr(vcvtps2udq))]
10988pub fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
10989 unsafe { transmute(src:vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), mask:k)) }
10990}
10991
10992/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
10993///
10994/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_epu32&expand=1754)
10995#[inline]
10996#[target_feature(enable = "avx512f,avx512vl")]
10997#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
10998#[cfg_attr(test, assert_instr(vcvtps2udq))]
10999pub fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
11000 unsafe { transmute(src:vcvtps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:k)) }
11001}
11002
11003/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11004///
11005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtps_epu32&expand=1749)
11006#[inline]
11007#[target_feature(enable = "avx512f,avx512vl")]
11008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11009#[cfg_attr(test, assert_instr(vcvtps2udq))]
11010pub fn _mm_cvtps_epu32(a: __m128) -> __m128i {
11011 unsafe { transmute(src:vcvtps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:0b11111111)) }
11012}
11013
11014/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11015///
11016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_epu32&expand=1750)
11017#[inline]
11018#[target_feature(enable = "avx512f,avx512vl")]
11019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11020#[cfg_attr(test, assert_instr(vcvtps2udq))]
11021pub fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
11022 unsafe { transmute(src:vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), mask:k)) }
11023}
11024
11025/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11026///
11027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_epu32&expand=1751)
11028#[inline]
11029#[target_feature(enable = "avx512f,avx512vl")]
11030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11031#[cfg_attr(test, assert_instr(vcvtps2udq))]
11032pub fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
11033 unsafe { transmute(src:vcvtps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:k)) }
11034}
11035
11036/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
11037///
11038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_pd&expand=1769)
11039#[inline]
11040#[target_feature(enable = "avx512f")]
11041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11042#[cfg_attr(test, assert_instr(vcvtps2pd))]
11043pub fn _mm512_cvtps_pd(a: __m256) -> __m512d {
11044 unsafe {
11045 transmute(src:vcvtps2pd(
11046 a.as_f32x8(),
11047 src:f64x8::ZERO,
11048 mask:0b11111111,
11049 _MM_FROUND_CUR_DIRECTION,
11050 ))
11051 }
11052}
11053
11054/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11055///
11056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_pd&expand=1770)
11057#[inline]
11058#[target_feature(enable = "avx512f")]
11059#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11060#[cfg_attr(test, assert_instr(vcvtps2pd))]
11061pub fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
11062 unsafe {
11063 transmute(src:vcvtps2pd(
11064 a.as_f32x8(),
11065 src.as_f64x8(),
11066 mask:k,
11067 _MM_FROUND_CUR_DIRECTION,
11068 ))
11069 }
11070}
11071
11072/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11073///
11074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_pd&expand=1771)
11075#[inline]
11076#[target_feature(enable = "avx512f")]
11077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11078#[cfg_attr(test, assert_instr(vcvtps2pd))]
11079pub fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
11080 unsafe {
11081 transmute(src:vcvtps2pd(
11082 a.as_f32x8(),
11083 src:f64x8::ZERO,
11084 mask:k,
11085 _MM_FROUND_CUR_DIRECTION,
11086 ))
11087 }
11088}
11089
11090/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
11091///
11092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpslo_pd&expand=1784)
11093#[inline]
11094#[target_feature(enable = "avx512f")]
11095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11096#[cfg_attr(test, assert_instr(vcvtps2pd))]
11097pub fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
11098 unsafe {
11099 transmute(src:vcvtps2pd(
11100 a:_mm512_castps512_ps256(v2).as_f32x8(),
11101 src:f64x8::ZERO,
11102 mask:0b11111111,
11103 _MM_FROUND_CUR_DIRECTION,
11104 ))
11105 }
11106}
11107
11108/// Performs element-by-element conversion of the lower half of packed single-precision (32-bit) floating-point elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11109///
11110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpslo_pd&expand=1785)
11111#[inline]
11112#[target_feature(enable = "avx512f")]
11113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11114#[cfg_attr(test, assert_instr(vcvtps2pd))]
11115pub fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
11116 unsafe {
11117 transmute(src:vcvtps2pd(
11118 a:_mm512_castps512_ps256(v2).as_f32x8(),
11119 src.as_f64x8(),
11120 mask:k,
11121 _MM_FROUND_CUR_DIRECTION,
11122 ))
11123 }
11124}
11125
11126/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
11127///
11128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_ps&expand=1712)
11129#[inline]
11130#[target_feature(enable = "avx512f")]
11131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11132#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11133pub fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
11134 unsafe {
11135 transmute(src:vcvtpd2ps(
11136 a.as_f64x8(),
11137 src:f32x8::ZERO,
11138 mask:0b11111111,
11139 _MM_FROUND_CUR_DIRECTION,
11140 ))
11141 }
11142}
11143
11144/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11145///
11146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_ps&expand=1713)
11147#[inline]
11148#[target_feature(enable = "avx512f")]
11149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11150#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11151pub fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
11152 unsafe {
11153 transmute(src:vcvtpd2ps(
11154 a.as_f64x8(),
11155 src.as_f32x8(),
11156 mask:k,
11157 _MM_FROUND_CUR_DIRECTION,
11158 ))
11159 }
11160}
11161
11162/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11163///
11164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_ps&expand=1714)
11165#[inline]
11166#[target_feature(enable = "avx512f")]
11167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11168#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11169pub fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
11170 unsafe {
11171 transmute(src:vcvtpd2ps(
11172 a.as_f64x8(),
11173 src:f32x8::ZERO,
11174 mask:k,
11175 _MM_FROUND_CUR_DIRECTION,
11176 ))
11177 }
11178}
11179
11180/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11181///
11182/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_ps&expand=1710)
11183#[inline]
11184#[target_feature(enable = "avx512f,avx512vl")]
11185#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11186#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11187pub fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
11188 unsafe {
11189 let convert: __m128 = _mm256_cvtpd_ps(a);
11190 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
11191 }
11192}
11193
11194/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11195///
11196/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_ps&expand=1711)
11197#[inline]
11198#[target_feature(enable = "avx512f,avx512vl")]
11199#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11200#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11201pub fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
11202 unsafe {
11203 let convert: __m128 = _mm256_cvtpd_ps(a);
11204 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:f32x4::ZERO))
11205 }
11206}
11207
11208/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11209///
11210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_ps&expand=1707)
11211#[inline]
11212#[target_feature(enable = "avx512f,avx512vl")]
11213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11214#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11215pub fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
11216 unsafe {
11217 let convert: __m128 = _mm_cvtpd_ps(a);
11218 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
11219 }
11220}
11221
11222/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11223///
11224/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_ps&expand=1708)
11225#[inline]
11226#[target_feature(enable = "avx512f,avx512vl")]
11227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11228#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11229pub fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
11230 unsafe {
11231 let convert: __m128 = _mm_cvtpd_ps(a);
11232 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:f32x4::ZERO))
11233 }
11234}
11235
11236/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
11237///
11238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epi32&expand=1675)
11239#[inline]
11240#[target_feature(enable = "avx512f")]
11241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11242#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11243pub fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
11244 unsafe {
11245 transmute(src:vcvtpd2dq(
11246 a.as_f64x8(),
11247 src:i32x8::ZERO,
11248 mask:0b11111111,
11249 _MM_FROUND_CUR_DIRECTION,
11250 ))
11251 }
11252}
11253
11254/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11255///
11256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epi32&expand=1676)
11257#[inline]
11258#[target_feature(enable = "avx512f")]
11259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11260#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11261pub fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11262 unsafe {
11263 transmute(src:vcvtpd2dq(
11264 a.as_f64x8(),
11265 src.as_i32x8(),
11266 mask:k,
11267 _MM_FROUND_CUR_DIRECTION,
11268 ))
11269 }
11270}
11271
11272/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11273///
11274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epi32&expand=1677)
11275#[inline]
11276#[target_feature(enable = "avx512f")]
11277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11278#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11279pub fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
11280 unsafe {
11281 transmute(src:vcvtpd2dq(
11282 a.as_f64x8(),
11283 src:i32x8::ZERO,
11284 mask:k,
11285 _MM_FROUND_CUR_DIRECTION,
11286 ))
11287 }
11288}
11289
11290/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11291///
11292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epi32&expand=1673)
11293#[inline]
11294#[target_feature(enable = "avx512f,avx512vl")]
11295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11296#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11297pub fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11298 unsafe {
11299 let convert: __m128i = _mm256_cvtpd_epi32(a);
11300 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
11301 }
11302}
11303
11304/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11305///
11306/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epi32&expand=1674)
11307#[inline]
11308#[target_feature(enable = "avx512f,avx512vl")]
11309#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11310#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11311pub fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
11312 unsafe {
11313 let convert: __m128i = _mm256_cvtpd_epi32(a);
11314 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:i32x4::ZERO))
11315 }
11316}
11317
11318/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11319///
11320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epi32&expand=1670)
11321#[inline]
11322#[target_feature(enable = "avx512f,avx512vl")]
11323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11324#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11325pub fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11326 unsafe {
11327 let convert: __m128i = _mm_cvtpd_epi32(a);
11328 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:src.as_i32x4()))
11329 }
11330}
11331
11332/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11333///
11334/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epi32&expand=1671)
11335#[inline]
11336#[target_feature(enable = "avx512f,avx512vl")]
11337#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11338#[cfg_attr(test, assert_instr(vcvtpd2dq))]
11339pub fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
11340 unsafe {
11341 let convert: __m128i = _mm_cvtpd_epi32(a);
11342 transmute(src:simd_select_bitmask(m:k, yes:convert.as_i32x4(), no:i32x4::ZERO))
11343 }
11344}
11345
11346/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11347///
11348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_epu32&expand=1693)
11349#[inline]
11350#[target_feature(enable = "avx512f")]
11351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11352#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11353pub fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
11354 unsafe {
11355 transmute(src:vcvtpd2udq(
11356 a.as_f64x8(),
11357 src:u32x8::ZERO,
11358 mask:0b11111111,
11359 _MM_FROUND_CUR_DIRECTION,
11360 ))
11361 }
11362}
11363
11364/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11365///
11366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_epu32&expand=1694)
11367#[inline]
11368#[target_feature(enable = "avx512f")]
11369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11370#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11371pub fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
11372 unsafe {
11373 transmute(src:vcvtpd2udq(
11374 a.as_f64x8(),
11375 src.as_u32x8(),
11376 mask:k,
11377 _MM_FROUND_CUR_DIRECTION,
11378 ))
11379 }
11380}
11381
11382/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11383///
11384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtpd_epu32&expand=1695)
11385#[inline]
11386#[target_feature(enable = "avx512f")]
11387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11388#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11389pub fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
11390 unsafe {
11391 transmute(src:vcvtpd2udq(
11392 a.as_f64x8(),
11393 src:u32x8::ZERO,
11394 mask:k,
11395 _MM_FROUND_CUR_DIRECTION,
11396 ))
11397 }
11398}
11399
11400/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11401///
11402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtpd_epu32&expand=1690)
11403#[inline]
11404#[target_feature(enable = "avx512f,avx512vl")]
11405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11406#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11407pub fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
11408 unsafe { transmute(src:vcvtpd2udq256(a.as_f64x4(), src:u32x4::ZERO, mask:0b11111111)) }
11409}
11410
11411/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11412///
11413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtpd_epu32&expand=1691)
11414#[inline]
11415#[target_feature(enable = "avx512f,avx512vl")]
11416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11417#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11418pub fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
11419 unsafe { transmute(src:vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), mask:k)) }
11420}
11421
11422/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11423///
11424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtpd_epu32&expand=1692)
11425#[inline]
11426#[target_feature(enable = "avx512f,avx512vl")]
11427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11428#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11429pub fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
11430 unsafe { transmute(src:vcvtpd2udq256(a.as_f64x4(), src:u32x4::ZERO, mask:k)) }
11431}
11432
11433/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.
11434///
11435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtpd_epu32&expand=1687)
11436#[inline]
11437#[target_feature(enable = "avx512f,avx512vl")]
11438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11439#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11440pub fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
11441 unsafe { transmute(src:vcvtpd2udq128(a.as_f64x2(), src:u32x4::ZERO, mask:0b11111111)) }
11442}
11443
11444/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11445///
11446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtpd_epu32&expand=1688)
11447#[inline]
11448#[target_feature(enable = "avx512f,avx512vl")]
11449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11450#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11451pub fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
11452 unsafe { transmute(src:vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), mask:k)) }
11453}
11454
11455/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11456///
11457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtpd_epu32&expand=1689)
11458#[inline]
11459#[target_feature(enable = "avx512f,avx512vl")]
11460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11461#[cfg_attr(test, assert_instr(vcvtpd2udq))]
11462pub fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
11463 unsafe { transmute(src:vcvtpd2udq128(a.as_f64x2(), src:u32x4::ZERO, mask:k)) }
11464}
11465
11466/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst. The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11467///
11468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtpd_pslo&expand=1715)
11469#[inline]
11470#[target_feature(enable = "avx512f")]
11471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11472#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11473pub fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
11474 unsafe {
11475 let r: f32x8 = vcvtpd2ps(
11476 a:v2.as_f64x8(),
11477 src:f32x8::ZERO,
11478 mask:0b11111111,
11479 _MM_FROUND_CUR_DIRECTION,
11480 );
11481 simd_shuffle!(
11482 r,
11483 f32x8::ZERO,
11484 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11485 )
11486 }
11487}
11488
11489/// Performs an element-by-element conversion of packed double-precision (64-bit) floating-point elements in v2 to single-precision (32-bit) floating-point elements and stores them in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). The elements are stored in the lower half of the results vector, while the remaining upper half locations are set to 0.
11490///
11491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtpd_pslo&expand=1716)
11492#[inline]
11493#[target_feature(enable = "avx512f")]
11494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11495#[cfg_attr(test, assert_instr(vcvtpd2ps))]
11496pub fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
11497 unsafe {
11498 let r: f32x8 = vcvtpd2ps(
11499 a:v2.as_f64x8(),
11500 src:_mm512_castps512_ps256(src).as_f32x8(),
11501 mask:k,
11502 _MM_FROUND_CUR_DIRECTION,
11503 );
11504 simd_shuffle!(
11505 r,
11506 f32x8::ZERO,
11507 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
11508 )
11509 }
11510}
11511
11512/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11513///
11514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi32&expand=1535)
11515#[inline]
11516#[target_feature(enable = "avx512f")]
11517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11518#[cfg_attr(test, assert_instr(vpmovsxbd))]
11519pub fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
11520 unsafe {
11521 let a: i8x16 = a.as_i8x16();
11522 transmute::<i32x16, _>(src:simd_cast(a))
11523 }
11524}
11525
11526/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11527///
11528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi32&expand=1536)
11529#[inline]
11530#[target_feature(enable = "avx512f")]
11531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11532#[cfg_attr(test, assert_instr(vpmovsxbd))]
11533pub fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11534 unsafe {
11535 let convert: i32x16 = _mm512_cvtepi8_epi32(a).as_i32x16();
11536 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11537 }
11538}
11539
11540/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11541///
11542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi32&expand=1537)
11543#[inline]
11544#[target_feature(enable = "avx512f")]
11545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11546#[cfg_attr(test, assert_instr(vpmovsxbd))]
11547pub fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11548 unsafe {
11549 let convert: i32x16 = _mm512_cvtepi8_epi32(a).as_i32x16();
11550 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
11551 }
11552}
11553
11554/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11555///
11556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi32&expand=1533)
11557#[inline]
11558#[target_feature(enable = "avx512f,avx512vl")]
11559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11560#[cfg_attr(test, assert_instr(vpmovsxbd))]
11561pub fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11562 unsafe {
11563 let convert: i32x8 = _mm256_cvtepi8_epi32(a).as_i32x8();
11564 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11565 }
11566}
11567
11568/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11569///
11570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi32&expand=1534)
11571#[inline]
11572#[target_feature(enable = "avx512f,avx512vl")]
11573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11574#[cfg_attr(test, assert_instr(vpmovsxbd))]
11575pub fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11576 unsafe {
11577 let convert: i32x8 = _mm256_cvtepi8_epi32(a).as_i32x8();
11578 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
11579 }
11580}
11581
11582/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11583///
11584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi32&expand=1530)
11585#[inline]
11586#[target_feature(enable = "avx512f,avx512vl")]
11587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11588#[cfg_attr(test, assert_instr(vpmovsxbd))]
11589pub fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11590 unsafe {
11591 let convert: i32x4 = _mm_cvtepi8_epi32(a).as_i32x4();
11592 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11593 }
11594}
11595
11596/// Sign extend packed 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11597///
11598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi32&expand=1531)
11599#[inline]
11600#[target_feature(enable = "avx512f,avx512vl")]
11601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11602#[cfg_attr(test, assert_instr(vpmovsxbd))]
11603pub fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11604 unsafe {
11605 let convert: i32x4 = _mm_cvtepi8_epi32(a).as_i32x4();
11606 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
11607 }
11608}
11609
11610/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst.
11611///
11612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi8_epi64&expand=1544)
11613#[inline]
11614#[target_feature(enable = "avx512f")]
11615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11616#[cfg_attr(test, assert_instr(vpmovsxbq))]
11617pub fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
11618 unsafe {
11619 let a: i8x16 = a.as_i8x16();
11620 let v64: i8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11621 transmute::<i64x8, _>(src:simd_cast(v64))
11622 }
11623}
11624
11625/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11626///
11627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi8_epi64&expand=1545)
11628#[inline]
11629#[target_feature(enable = "avx512f")]
11630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11631#[cfg_attr(test, assert_instr(vpmovsxbq))]
11632pub fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11633 unsafe {
11634 let convert: i64x8 = _mm512_cvtepi8_epi64(a).as_i64x8();
11635 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11636 }
11637}
11638
11639/// Sign extend packed 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11640///
11641/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi8_epi64&expand=1546)
11642#[inline]
11643#[target_feature(enable = "avx512f")]
11644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11645#[cfg_attr(test, assert_instr(vpmovsxbq))]
11646pub fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11647 unsafe {
11648 let convert: i64x8 = _mm512_cvtepi8_epi64(a).as_i64x8();
11649 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
11650 }
11651}
11652
11653/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11654///
11655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi8_epi64&expand=1542)
11656#[inline]
11657#[target_feature(enable = "avx512f,avx512vl")]
11658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11659#[cfg_attr(test, assert_instr(vpmovsxbq))]
11660pub fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11661 unsafe {
11662 let convert: i64x4 = _mm256_cvtepi8_epi64(a).as_i64x4();
11663 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
11664 }
11665}
11666
11667/// Sign extend packed 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11668///
11669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi8_epi64&expand=1543)
11670#[inline]
11671#[target_feature(enable = "avx512f,avx512vl")]
11672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11673#[cfg_attr(test, assert_instr(vpmovsxbq))]
11674pub fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11675 unsafe {
11676 let convert: i64x4 = _mm256_cvtepi8_epi64(a).as_i64x4();
11677 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
11678 }
11679}
11680
11681/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11682///
11683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi8_epi64&expand=1539)
11684#[inline]
11685#[target_feature(enable = "avx512f,avx512vl")]
11686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11687#[cfg_attr(test, assert_instr(vpmovsxbq))]
11688pub fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11689 unsafe {
11690 let convert: i64x2 = _mm_cvtepi8_epi64(a).as_i64x2();
11691 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
11692 }
11693}
11694
11695/// Sign extend packed 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11696///
11697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi8_epi64&expand=1540)
11698#[inline]
11699#[target_feature(enable = "avx512f,avx512vl")]
11700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11701#[cfg_attr(test, assert_instr(vpmovsxbq))]
11702pub fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11703 unsafe {
11704 let convert: i64x2 = _mm_cvtepi8_epi64(a).as_i64x2();
11705 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
11706 }
11707}
11708
11709/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst.
11710///
11711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi32&expand=1621)
11712#[inline]
11713#[target_feature(enable = "avx512f")]
11714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11715#[cfg_attr(test, assert_instr(vpmovzxbd))]
11716pub fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
11717 unsafe {
11718 let a: u8x16 = a.as_u8x16();
11719 transmute::<i32x16, _>(src:simd_cast(a))
11720 }
11721}
11722
11723/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11724///
11725/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi32&expand=1622)
11726#[inline]
11727#[target_feature(enable = "avx512f")]
11728#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11729#[cfg_attr(test, assert_instr(vpmovzxbd))]
11730pub fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
11731 unsafe {
11732 let convert: i32x16 = _mm512_cvtepu8_epi32(a).as_i32x16();
11733 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11734 }
11735}
11736
11737/// Zero extend packed unsigned 8-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11738///
11739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi32&expand=1623)
11740#[inline]
11741#[target_feature(enable = "avx512f")]
11742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11743#[cfg_attr(test, assert_instr(vpmovzxbd))]
11744pub fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
11745 unsafe {
11746 let convert: i32x16 = _mm512_cvtepu8_epi32(a).as_i32x16();
11747 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
11748 }
11749}
11750
11751/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11752///
11753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi32&expand=1619)
11754#[inline]
11755#[target_feature(enable = "avx512f,avx512vl")]
11756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11757#[cfg_attr(test, assert_instr(vpmovzxbd))]
11758pub fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11759 unsafe {
11760 let convert: i32x8 = _mm256_cvtepu8_epi32(a).as_i32x8();
11761 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11762 }
11763}
11764
11765/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11766///
11767/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm256_maskz_cvtepu8_epi32&expand=1620)
11768#[inline]
11769#[target_feature(enable = "avx512f,avx512vl")]
11770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11771#[cfg_attr(test, assert_instr(vpmovzxbd))]
11772pub fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
11773 unsafe {
11774 let convert: i32x8 = _mm256_cvtepu8_epi32(a).as_i32x8();
11775 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
11776 }
11777}
11778
11779/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11780///
11781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi32&expand=1616)
11782#[inline]
11783#[target_feature(enable = "avx512f,avx512vl")]
11784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11785#[cfg_attr(test, assert_instr(vpmovzxbd))]
11786pub fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11787 unsafe {
11788 let convert: i32x4 = _mm_cvtepu8_epi32(a).as_i32x4();
11789 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11790 }
11791}
11792
11793/// Zero extend packed unsigned 8-bit integers in th elow 4 bytes of a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11794///
11795/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm_maskz_cvtepu8_epi32&expand=1617)
11796#[inline]
11797#[target_feature(enable = "avx512f,avx512vl")]
11798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11799#[cfg_attr(test, assert_instr(vpmovzxbd))]
11800pub fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
11801 unsafe {
11802 let convert: i32x4 = _mm_cvtepu8_epi32(a).as_i32x4();
11803 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
11804 }
11805}
11806
11807/// Zero extend packed unsigned 8-bit integers in the low 8 byte sof a to packed 64-bit integers, and store the results in dst.
11808///
11809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu8_epi64&expand=1630)
11810#[inline]
11811#[target_feature(enable = "avx512f")]
11812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11813#[cfg_attr(test, assert_instr(vpmovzxbq))]
11814pub fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
11815 unsafe {
11816 let a: u8x16 = a.as_u8x16();
11817 let v64: u8x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
11818 transmute::<i64x8, _>(src:simd_cast(v64))
11819 }
11820}
11821
11822/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11823///
11824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu8_epi64&expand=1631)
11825#[inline]
11826#[target_feature(enable = "avx512f")]
11827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11828#[cfg_attr(test, assert_instr(vpmovzxbq))]
11829pub fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
11830 unsafe {
11831 let convert: i64x8 = _mm512_cvtepu8_epi64(a).as_i64x8();
11832 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
11833 }
11834}
11835
11836/// Zero extend packed unsigned 8-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11837///
11838/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu8_epi64&expand=1632)
11839#[inline]
11840#[target_feature(enable = "avx512f")]
11841#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11842#[cfg_attr(test, assert_instr(vpmovzxbq))]
11843pub fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
11844 unsafe {
11845 let convert: i64x8 = _mm512_cvtepu8_epi64(a).as_i64x8();
11846 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
11847 }
11848}
11849
11850/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11851///
11852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu8_epi64&expand=1628)
11853#[inline]
11854#[target_feature(enable = "avx512f,avx512vl")]
11855#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11856#[cfg_attr(test, assert_instr(vpmovzxbq))]
11857pub fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11858 unsafe {
11859 let convert: i64x4 = _mm256_cvtepu8_epi64(a).as_i64x4();
11860 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
11861 }
11862}
11863
11864/// Zero extend packed unsigned 8-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11865///
11866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu8_epi64&expand=1629)
11867#[inline]
11868#[target_feature(enable = "avx512f,avx512vl")]
11869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11870#[cfg_attr(test, assert_instr(vpmovzxbq))]
11871pub fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
11872 unsafe {
11873 let convert: i64x4 = _mm256_cvtepu8_epi64(a).as_i64x4();
11874 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
11875 }
11876}
11877
11878/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11879///
11880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu8_epi64&expand=1625)
11881#[inline]
11882#[target_feature(enable = "avx512f,avx512vl")]
11883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11884#[cfg_attr(test, assert_instr(vpmovzxbq))]
11885pub fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11886 unsafe {
11887 let convert: i64x2 = _mm_cvtepu8_epi64(a).as_i64x2();
11888 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
11889 }
11890}
11891
11892/// Zero extend packed unsigned 8-bit integers in the low 2 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11893///
11894/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu8_epi64&expand=1626)
11895#[inline]
11896#[target_feature(enable = "avx512f,avx512vl")]
11897#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11898#[cfg_attr(test, assert_instr(vpmovzxbq))]
11899pub fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
11900 unsafe {
11901 let convert: i64x2 = _mm_cvtepu8_epi64(a).as_i64x2();
11902 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
11903 }
11904}
11905
11906/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst.
11907///
11908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi32&expand=1389)
11909#[inline]
11910#[target_feature(enable = "avx512f")]
11911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11912#[cfg_attr(test, assert_instr(vpmovsxwd))]
11913pub fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
11914 unsafe {
11915 let a: i16x16 = a.as_i16x16();
11916 transmute::<i32x16, _>(src:simd_cast(a))
11917 }
11918}
11919
11920/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11921///
11922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi32&expand=1390)
11923#[inline]
11924#[target_feature(enable = "avx512f")]
11925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11926#[cfg_attr(test, assert_instr(vpmovsxwd))]
11927pub fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
11928 unsafe {
11929 let convert: i32x16 = _mm512_cvtepi16_epi32(a).as_i32x16();
11930 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
11931 }
11932}
11933
11934/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11935///
11936/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi32&expand=1391)
11937#[inline]
11938#[target_feature(enable = "avx512f")]
11939#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11940#[cfg_attr(test, assert_instr(vpmovsxwd))]
11941pub fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
11942 unsafe {
11943 let convert: i32x16 = _mm512_cvtepi16_epi32(a).as_i32x16();
11944 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
11945 }
11946}
11947
11948/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11949///
11950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi32&expand=1387)
11951#[inline]
11952#[target_feature(enable = "avx512f,avx512vl")]
11953#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11954#[cfg_attr(test, assert_instr(vpmovsxwd))]
11955pub fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
11956 unsafe {
11957 let convert: i32x8 = _mm256_cvtepi16_epi32(a).as_i32x8();
11958 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
11959 }
11960}
11961
11962/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11963///
11964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi32&expand=1388)
11965#[inline]
11966#[target_feature(enable = "avx512f,avx512vl")]
11967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11968#[cfg_attr(test, assert_instr(vpmovsxwd))]
11969pub fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
11970 unsafe {
11971 let convert: i32x8 = _mm256_cvtepi16_epi32(a).as_i32x8();
11972 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
11973 }
11974}
11975
11976/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
11977///
11978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi32&expand=1384)
11979#[inline]
11980#[target_feature(enable = "avx512f,avx512vl")]
11981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11982#[cfg_attr(test, assert_instr(vpmovsxwd))]
11983pub fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
11984 unsafe {
11985 let convert: i32x4 = _mm_cvtepi16_epi32(a).as_i32x4();
11986 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
11987 }
11988}
11989
11990/// Sign extend packed 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
11991///
11992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi32&expand=1385)
11993#[inline]
11994#[target_feature(enable = "avx512f,avx512vl")]
11995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
11996#[cfg_attr(test, assert_instr(vpmovsxwd))]
11997pub fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
11998 unsafe {
11999 let convert: i32x4 = _mm_cvtepi16_epi32(a).as_i32x4();
12000 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
12001 }
12002}
12003
12004/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12005///
12006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi16_epi64&expand=1398)
12007#[inline]
12008#[target_feature(enable = "avx512f")]
12009#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12010#[cfg_attr(test, assert_instr(vpmovsxwq))]
12011pub fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
12012 unsafe {
12013 let a: i16x8 = a.as_i16x8();
12014 transmute::<i64x8, _>(src:simd_cast(a))
12015 }
12016}
12017
12018/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12019///
12020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi16_epi64&expand=1399)
12021#[inline]
12022#[target_feature(enable = "avx512f")]
12023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12024#[cfg_attr(test, assert_instr(vpmovsxwq))]
12025pub fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12026 unsafe {
12027 let convert: i64x8 = _mm512_cvtepi16_epi64(a).as_i64x8();
12028 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12029 }
12030}
12031
12032/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12033///
12034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi16_epi64&expand=1400)
12035#[inline]
12036#[target_feature(enable = "avx512f")]
12037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12038#[cfg_attr(test, assert_instr(vpmovsxwq))]
12039pub fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12040 unsafe {
12041 let convert: i64x8 = _mm512_cvtepi16_epi64(a).as_i64x8();
12042 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12043 }
12044}
12045
12046/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12047///
12048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi16_epi64&expand=1396)
12049#[inline]
12050#[target_feature(enable = "avx512f,avx512vl")]
12051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12052#[cfg_attr(test, assert_instr(vpmovsxwq))]
12053pub fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12054 unsafe {
12055 let convert: i64x4 = _mm256_cvtepi16_epi64(a).as_i64x4();
12056 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12057 }
12058}
12059
12060/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12061///
12062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi16_epi64&expand=1397)
12063#[inline]
12064#[target_feature(enable = "avx512f,avx512vl")]
12065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12066#[cfg_attr(test, assert_instr(vpmovsxwq))]
12067pub fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12068 unsafe {
12069 let convert: i64x4 = _mm256_cvtepi16_epi64(a).as_i64x4();
12070 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12071 }
12072}
12073
12074/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12075///
12076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi16_epi64&expand=1393)
12077#[inline]
12078#[target_feature(enable = "avx512f,avx512vl")]
12079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12080#[cfg_attr(test, assert_instr(vpmovsxwq))]
12081pub fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12082 unsafe {
12083 let convert: i64x2 = _mm_cvtepi16_epi64(a).as_i64x2();
12084 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12085 }
12086}
12087
12088/// Sign extend packed 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12089///
12090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi16_epi64&expand=1394)
12091#[inline]
12092#[target_feature(enable = "avx512f,avx512vl")]
12093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12094#[cfg_attr(test, assert_instr(vpmovsxwq))]
12095pub fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12096 unsafe {
12097 let convert: i64x2 = _mm_cvtepi16_epi64(a).as_i64x2();
12098 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12099 }
12100}
12101
12102/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst.
12103///
12104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi32&expand=1553)
12105#[inline]
12106#[target_feature(enable = "avx512f")]
12107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12108#[cfg_attr(test, assert_instr(vpmovzxwd))]
12109pub fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
12110 unsafe {
12111 let a: u16x16 = a.as_u16x16();
12112 transmute::<i32x16, _>(src:simd_cast(a))
12113 }
12114}
12115
12116/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12117///
12118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi32&expand=1554)
12119#[inline]
12120#[target_feature(enable = "avx512f")]
12121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12122#[cfg_attr(test, assert_instr(vpmovzxwd))]
12123pub fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
12124 unsafe {
12125 let convert: i32x16 = _mm512_cvtepu16_epi32(a).as_i32x16();
12126 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x16()))
12127 }
12128}
12129
12130/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12131///
12132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi32&expand=1555)
12133#[inline]
12134#[target_feature(enable = "avx512f")]
12135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12136#[cfg_attr(test, assert_instr(vpmovzxwd))]
12137pub fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
12138 unsafe {
12139 let convert: i32x16 = _mm512_cvtepu16_epi32(a).as_i32x16();
12140 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x16::ZERO))
12141 }
12142}
12143
12144/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12145///
12146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi32&expand=1551)
12147#[inline]
12148#[target_feature(enable = "avx512f,avx512vl")]
12149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12150#[cfg_attr(test, assert_instr(vpmovzxwd))]
12151pub fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12152 unsafe {
12153 let convert: i32x8 = _mm256_cvtepu16_epi32(a).as_i32x8();
12154 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
12155 }
12156}
12157
12158/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12159///
12160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi32&expand=1552)
12161#[inline]
12162#[target_feature(enable = "avx512f,avx512vl")]
12163#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12164#[cfg_attr(test, assert_instr(vpmovzxwd))]
12165pub fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
12166 unsafe {
12167 let convert: i32x8 = _mm256_cvtepu16_epi32(a).as_i32x8();
12168 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
12169 }
12170}
12171
12172/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12173///
12174/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi32&expand=1548)
12175#[inline]
12176#[target_feature(enable = "avx512f,avx512vl")]
12177#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12178#[cfg_attr(test, assert_instr(vpmovzxwd))]
12179pub fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12180 unsafe {
12181 let convert: i32x4 = _mm_cvtepu16_epi32(a).as_i32x4();
12182 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
12183 }
12184}
12185
12186/// Zero extend packed unsigned 16-bit integers in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12187///
12188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi32&expand=1549)
12189#[inline]
12190#[target_feature(enable = "avx512f,avx512vl")]
12191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12192#[cfg_attr(test, assert_instr(vpmovzxwd))]
12193pub fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
12194 unsafe {
12195 let convert: i32x4 = _mm_cvtepu16_epi32(a).as_i32x4();
12196 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
12197 }
12198}
12199
12200/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst.
12201///
12202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu16_epi64&expand=1562)
12203#[inline]
12204#[target_feature(enable = "avx512f")]
12205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12206#[cfg_attr(test, assert_instr(vpmovzxwq))]
12207pub fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
12208 unsafe {
12209 let a: u16x8 = a.as_u16x8();
12210 transmute::<i64x8, _>(src:simd_cast(a))
12211 }
12212}
12213
12214/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12215///
12216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu16_epi64&expand=1563)
12217#[inline]
12218#[target_feature(enable = "avx512f")]
12219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12220#[cfg_attr(test, assert_instr(vpmovzxwq))]
12221pub fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
12222 unsafe {
12223 let convert: i64x8 = _mm512_cvtepu16_epi64(a).as_i64x8();
12224 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12225 }
12226}
12227
12228/// Zero extend packed unsigned 16-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12229///
12230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu16_epi64&expand=1564)
12231#[inline]
12232#[target_feature(enable = "avx512f")]
12233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12234#[cfg_attr(test, assert_instr(vpmovzxwq))]
12235pub fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
12236 unsafe {
12237 let convert: i64x8 = _mm512_cvtepu16_epi64(a).as_i64x8();
12238 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12239 }
12240}
12241
12242/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12243///
12244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu16_epi64&expand=1560)
12245#[inline]
12246#[target_feature(enable = "avx512f,avx512vl")]
12247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12248#[cfg_attr(test, assert_instr(vpmovzxwq))]
12249pub fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12250 unsafe {
12251 let convert: i64x4 = _mm256_cvtepu16_epi64(a).as_i64x4();
12252 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12253 }
12254}
12255
12256/// Zero extend packed unsigned 16-bit integers in the low 8 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12257///
12258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu16_epi64&expand=1561)
12259#[inline]
12260#[target_feature(enable = "avx512f,avx512vl")]
12261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12262#[cfg_attr(test, assert_instr(vpmovzxwq))]
12263pub fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
12264 unsafe {
12265 let convert: i64x4 = _mm256_cvtepu16_epi64(a).as_i64x4();
12266 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12267 }
12268}
12269
12270/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12271///
12272/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu16_epi64&expand=1557)
12273#[inline]
12274#[target_feature(enable = "avx512f,avx512vl")]
12275#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12276#[cfg_attr(test, assert_instr(vpmovzxwq))]
12277pub fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12278 unsafe {
12279 let convert: i64x2 = _mm_cvtepu16_epi64(a).as_i64x2();
12280 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12281 }
12282}
12283
12284/// Zero extend packed unsigned 16-bit integers in the low 4 bytes of a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12285///
12286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu16_epi64&expand=1558)
12287#[inline]
12288#[target_feature(enable = "avx512f,avx512vl")]
12289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12290#[cfg_attr(test, assert_instr(vpmovzxwq))]
12291pub fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
12292 unsafe {
12293 let convert: i64x2 = _mm_cvtepu16_epi64(a).as_i64x2();
12294 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12295 }
12296}
12297
12298/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12299///
12300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi64&expand=1428)
12301#[inline]
12302#[target_feature(enable = "avx512f")]
12303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12304#[cfg_attr(test, assert_instr(vpmovsxdq))]
12305pub fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
12306 unsafe {
12307 let a: i32x8 = a.as_i32x8();
12308 transmute::<i64x8, _>(src:simd_cast(a))
12309 }
12310}
12311
12312/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12313///
12314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi64&expand=1429)
12315#[inline]
12316#[target_feature(enable = "avx512f")]
12317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12318#[cfg_attr(test, assert_instr(vpmovsxdq))]
12319pub fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12320 unsafe {
12321 let convert: i64x8 = _mm512_cvtepi32_epi64(a).as_i64x8();
12322 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12323 }
12324}
12325
12326/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12327///
12328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi64&expand=1430)
12329#[inline]
12330#[target_feature(enable = "avx512f")]
12331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12332#[cfg_attr(test, assert_instr(vpmovsxdq))]
12333pub fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12334 unsafe {
12335 let convert: i64x8 = _mm512_cvtepi32_epi64(a).as_i64x8();
12336 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12337 }
12338}
12339
12340/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12341///
12342/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi64&expand=1426)
12343#[inline]
12344#[target_feature(enable = "avx512f,avx512vl")]
12345#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12346#[cfg_attr(test, assert_instr(vpmovsxdq))]
12347pub fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12348 unsafe {
12349 let convert: i64x4 = _mm256_cvtepi32_epi64(a).as_i64x4();
12350 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12351 }
12352}
12353
12354/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12355///
12356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi64&expand=1427)
12357#[inline]
12358#[target_feature(enable = "avx512f,avx512vl")]
12359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12360#[cfg_attr(test, assert_instr(vpmovsxdq))]
12361pub fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12362 unsafe {
12363 let convert: i64x4 = _mm256_cvtepi32_epi64(a).as_i64x4();
12364 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12365 }
12366}
12367
12368/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12369///
12370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi64&expand=1423)
12371#[inline]
12372#[target_feature(enable = "avx512f,avx512vl")]
12373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12374#[cfg_attr(test, assert_instr(vpmovsxdq))]
12375pub fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12376 unsafe {
12377 let convert: i64x2 = _mm_cvtepi32_epi64(a).as_i64x2();
12378 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12379 }
12380}
12381
12382/// Sign extend packed 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12383///
12384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi64&expand=1424)
12385#[inline]
12386#[target_feature(enable = "avx512f,avx512vl")]
12387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12388#[cfg_attr(test, assert_instr(vpmovsxdq))]
12389pub fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12390 unsafe {
12391 let convert: i64x2 = _mm_cvtepi32_epi64(a).as_i64x2();
12392 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12393 }
12394}
12395
12396/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst.
12397///
12398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_epi64&expand=1571)
12399#[inline]
12400#[target_feature(enable = "avx512f")]
12401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12402#[cfg_attr(test, assert_instr(vpmovzxdq))]
12403pub fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
12404 unsafe {
12405 let a: u32x8 = a.as_u32x8();
12406 transmute::<i64x8, _>(src:simd_cast(a))
12407 }
12408}
12409
12410/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12411///
12412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_epi64&expand=1572)
12413#[inline]
12414#[target_feature(enable = "avx512f")]
12415#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12416#[cfg_attr(test, assert_instr(vpmovzxdq))]
12417pub fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
12418 unsafe {
12419 let convert: i64x8 = _mm512_cvtepu32_epi64(a).as_i64x8();
12420 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x8()))
12421 }
12422}
12423
12424/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12425///
12426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_epi64&expand=1573)
12427#[inline]
12428#[target_feature(enable = "avx512f")]
12429#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12430#[cfg_attr(test, assert_instr(vpmovzxdq))]
12431pub fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
12432 unsafe {
12433 let convert: i64x8 = _mm512_cvtepu32_epi64(a).as_i64x8();
12434 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x8::ZERO))
12435 }
12436}
12437
12438/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12439///
12440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_epi64&expand=1569)
12441#[inline]
12442#[target_feature(enable = "avx512f,avx512vl")]
12443#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12444#[cfg_attr(test, assert_instr(vpmovzxdq))]
12445pub fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
12446 unsafe {
12447 let convert: i64x4 = _mm256_cvtepu32_epi64(a).as_i64x4();
12448 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x4()))
12449 }
12450}
12451
12452/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12453///
12454/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_epi64&expand=1570)
12455#[inline]
12456#[target_feature(enable = "avx512f,avx512vl")]
12457#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12458#[cfg_attr(test, assert_instr(vpmovzxdq))]
12459pub fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
12460 unsafe {
12461 let convert: i64x4 = _mm256_cvtepu32_epi64(a).as_i64x4();
12462 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x4::ZERO))
12463 }
12464}
12465
12466/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12467///
12468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_epi64&expand=1566)
12469#[inline]
12470#[target_feature(enable = "avx512f,avx512vl")]
12471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12472#[cfg_attr(test, assert_instr(vpmovzxdq))]
12473pub fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
12474 unsafe {
12475 let convert: i64x2 = _mm_cvtepu32_epi64(a).as_i64x2();
12476 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i64x2()))
12477 }
12478}
12479
12480/// Zero extend packed unsigned 32-bit integers in a to packed 64-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12481///
12482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_epi64&expand=1567)
12483#[inline]
12484#[target_feature(enable = "avx512f,avx512vl")]
12485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12486#[cfg_attr(test, assert_instr(vpmovzxdq))]
12487pub fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
12488 unsafe {
12489 let convert: i64x2 = _mm_cvtepu32_epi64(a).as_i64x2();
12490 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i64x2::ZERO))
12491 }
12492}
12493
12494/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12495///
12496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_ps&expand=1455)
12497#[inline]
12498#[target_feature(enable = "avx512f")]
12499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12500#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12501pub fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
12502 unsafe {
12503 let a: i32x16 = a.as_i32x16();
12504 transmute::<f32x16, _>(src:simd_cast(a))
12505 }
12506}
12507
12508/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12509///
12510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_ps&expand=1456)
12511#[inline]
12512#[target_feature(enable = "avx512f")]
12513#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12514#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12515pub fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12516 unsafe {
12517 let convert: f32x16 = _mm512_cvtepi32_ps(a).as_f32x16();
12518 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x16()))
12519 }
12520}
12521
12522/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12523///
12524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_ps&expand=1457)
12525#[inline]
12526#[target_feature(enable = "avx512f")]
12527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12528#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12529pub fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
12530 unsafe {
12531 let convert: f32x16 = _mm512_cvtepi32_ps(a).as_f32x16();
12532 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x16::ZERO))
12533 }
12534}
12535
12536/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12537///
12538/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_ps&expand=1453)
12539#[inline]
12540#[target_feature(enable = "avx512f,avx512vl")]
12541#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12542#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12543pub fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
12544 unsafe {
12545 let convert: f32x8 = _mm256_cvtepi32_ps(a).as_f32x8();
12546 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x8()))
12547 }
12548}
12549
12550/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12551///
12552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_ps&expand=1454)
12553#[inline]
12554#[target_feature(enable = "avx512f,avx512vl")]
12555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12556#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12557pub fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
12558 unsafe {
12559 let convert: f32x8 = _mm256_cvtepi32_ps(a).as_f32x8();
12560 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x8::ZERO))
12561 }
12562}
12563
12564/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12565///
12566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_ps&expand=1450)
12567#[inline]
12568#[target_feature(enable = "avx512f,avx512vl")]
12569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12570#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12571pub fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
12572 unsafe {
12573 let convert: f32x4 = _mm_cvtepi32_ps(a).as_f32x4();
12574 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x4()))
12575 }
12576}
12577
12578/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12579///
12580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_ps&expand=1451)
12581#[inline]
12582#[target_feature(enable = "avx512f,avx512vl")]
12583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12584#[cfg_attr(test, assert_instr(vcvtdq2ps))]
12585pub fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
12586 unsafe {
12587 let convert: f32x4 = _mm_cvtepi32_ps(a).as_f32x4();
12588 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x4::ZERO))
12589 }
12590}
12591
12592/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12593///
12594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_pd&expand=1446)
12595#[inline]
12596#[target_feature(enable = "avx512f")]
12597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12598#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12599pub fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
12600 unsafe {
12601 let a: i32x8 = a.as_i32x8();
12602 transmute::<f64x8, _>(src:simd_cast(a))
12603 }
12604}
12605
12606/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12607///
12608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_pd&expand=1447)
12609#[inline]
12610#[target_feature(enable = "avx512f")]
12611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12612#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12613pub fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12614 unsafe {
12615 let convert: f64x8 = _mm512_cvtepi32_pd(a).as_f64x8();
12616 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12617 }
12618}
12619
12620/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12621///
12622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_pd&expand=1448)
12623#[inline]
12624#[target_feature(enable = "avx512f")]
12625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12626#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12627pub fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
12628 unsafe {
12629 let convert: f64x8 = _mm512_cvtepi32_pd(a).as_f64x8();
12630 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x8::ZERO))
12631 }
12632}
12633
12634/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12635///
12636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_pd&expand=1444)
12637#[inline]
12638#[target_feature(enable = "avx512f,avx512vl")]
12639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12640#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12641pub fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12642 unsafe {
12643 let convert: f64x4 = _mm256_cvtepi32_pd(a).as_f64x4();
12644 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x4()))
12645 }
12646}
12647
12648/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12649///
12650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_pd&expand=1445)
12651#[inline]
12652#[target_feature(enable = "avx512f,avx512vl")]
12653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12654#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12655pub fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
12656 unsafe {
12657 let convert: f64x4 = _mm256_cvtepi32_pd(a).as_f64x4();
12658 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x4::ZERO))
12659 }
12660}
12661
12662/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12663///
12664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_pd&expand=1441)
12665#[inline]
12666#[target_feature(enable = "avx512f,avx512vl")]
12667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12668#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12669pub fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12670 unsafe {
12671 let convert: f64x2 = _mm_cvtepi32_pd(a).as_f64x2();
12672 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x2()))
12673 }
12674}
12675
12676/// Convert packed signed 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12677///
12678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_pd&expand=1442)
12679#[inline]
12680#[target_feature(enable = "avx512f,avx512vl")]
12681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12682#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12683pub fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
12684 unsafe {
12685 let convert: f64x2 = _mm_cvtepi32_pd(a).as_f64x2();
12686 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x2::ZERO))
12687 }
12688}
12689
12690/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
12691///
12692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_ps&expand=1583)
12693#[inline]
12694#[target_feature(enable = "avx512f")]
12695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12696#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12697pub fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
12698 unsafe {
12699 let a: u32x16 = a.as_u32x16();
12700 transmute::<f32x16, _>(src:simd_cast(a))
12701 }
12702}
12703
12704/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12705///
12706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_ps&expand=1584)
12707#[inline]
12708#[target_feature(enable = "avx512f")]
12709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12710#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12711pub fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
12712 unsafe {
12713 let convert: f32x16 = _mm512_cvtepu32_ps(a).as_f32x16();
12714 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f32x16()))
12715 }
12716}
12717
12718/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12719///
12720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_ps&expand=1585)
12721#[inline]
12722#[target_feature(enable = "avx512f")]
12723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12724#[cfg_attr(test, assert_instr(vcvtudq2ps))]
12725pub fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
12726 unsafe {
12727 let convert: f32x16 = _mm512_cvtepu32_ps(a).as_f32x16();
12728 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f32x16::ZERO))
12729 }
12730}
12731
12732/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12733///
12734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32_pd&expand=1580)
12735#[inline]
12736#[target_feature(enable = "avx512f")]
12737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12738#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12739pub fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
12740 unsafe {
12741 let a: u32x8 = a.as_u32x8();
12742 transmute::<f64x8, _>(src:simd_cast(a))
12743 }
12744}
12745
12746/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12747///
12748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32_pd&expand=1581)
12749#[inline]
12750#[target_feature(enable = "avx512f")]
12751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12752#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12753pub fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
12754 unsafe {
12755 let convert: f64x8 = _mm512_cvtepu32_pd(a).as_f64x8();
12756 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12757 }
12758}
12759
12760/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12761///
12762/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepu32_pd&expand=1582)
12763#[inline]
12764#[target_feature(enable = "avx512f")]
12765#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12766#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12767pub fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
12768 unsafe {
12769 let convert: f64x8 = _mm512_cvtepu32_pd(a).as_f64x8();
12770 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x8::ZERO))
12771 }
12772}
12773
12774/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12775///
12776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepu32_pd&expand=1577)
12777#[inline]
12778#[target_feature(enable = "avx512f,avx512vl")]
12779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12780#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12781pub fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
12782 unsafe {
12783 let a: u32x4 = a.as_u32x4();
12784 transmute::<f64x4, _>(src:simd_cast(a))
12785 }
12786}
12787
12788/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12789///
12790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepu32_pd&expand=1578)
12791#[inline]
12792#[target_feature(enable = "avx512f,avx512vl")]
12793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12794#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12795pub fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
12796 unsafe {
12797 let convert: f64x4 = _mm256_cvtepu32_pd(a).as_f64x4();
12798 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x4()))
12799 }
12800}
12801
12802/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12803///
12804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepu32_pd&expand=1579)
12805#[inline]
12806#[target_feature(enable = "avx512f,avx512vl")]
12807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12808#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12809pub fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
12810 unsafe {
12811 let convert: f64x4 = _mm256_cvtepu32_pd(a).as_f64x4();
12812 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x4::ZERO))
12813 }
12814}
12815
12816/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.
12817///
12818/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepu32_pd&expand=1574)
12819#[inline]
12820#[target_feature(enable = "avx512f,avx512vl")]
12821#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12822#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12823pub fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
12824 unsafe {
12825 let a: u32x4 = a.as_u32x4();
12826 let u64: u32x2 = simd_shuffle!(a, a, [0, 1]);
12827 transmute::<f64x2, _>(src:simd_cast(u64))
12828 }
12829}
12830
12831/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12832///
12833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepu32_pd&expand=1575)
12834#[inline]
12835#[target_feature(enable = "avx512f,avx512vl")]
12836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12837#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12838pub fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
12839 unsafe {
12840 let convert: f64x2 = _mm_cvtepu32_pd(a).as_f64x2();
12841 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x2()))
12842 }
12843}
12844
12845/// Convert packed unsigned 32-bit integers in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12846///
12847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepu32_pd&expand=1576)
12848#[inline]
12849#[target_feature(enable = "avx512f,avx512vl")]
12850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12851#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12852pub fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
12853 unsafe {
12854 let convert: f64x2 = _mm_cvtepu32_pd(a).as_f64x2();
12855 transmute(src:simd_select_bitmask(m:k, yes:convert, no:f64x2::ZERO))
12856 }
12857}
12858
12859/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12860///
12861/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32lo_pd&expand=1464)
12862#[inline]
12863#[target_feature(enable = "avx512f")]
12864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12865#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12866pub fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
12867 unsafe {
12868 let v2: i32x16 = v2.as_i32x16();
12869 let v256: i32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12870 transmute::<f64x8, _>(src:simd_cast(v256))
12871 }
12872}
12873
12874/// Performs element-by-element conversion of the lower half of packed 32-bit integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12875///
12876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32lo_pd&expand=1465)
12877#[inline]
12878#[target_feature(enable = "avx512f")]
12879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12880#[cfg_attr(test, assert_instr(vcvtdq2pd))]
12881pub fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12882 unsafe {
12883 let convert: f64x8 = _mm512_cvtepi32lo_pd(v2).as_f64x8();
12884 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12885 }
12886}
12887
12888/// Performs element-by-element conversion of the lower half of packed 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst.
12889///
12890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepu32lo_pd&expand=1586)
12891#[inline]
12892#[target_feature(enable = "avx512f")]
12893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12894#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12895pub fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
12896 unsafe {
12897 let v2: u32x16 = v2.as_u32x16();
12898 let v256: u32x8 = simd_shuffle!(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
12899 transmute::<f64x8, _>(src:simd_cast(v256))
12900 }
12901}
12902
12903/// Performs element-by-element conversion of the lower half of 32-bit unsigned integer elements in v2 to packed double-precision (64-bit) floating-point elements, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12904///
12905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepu32lo_pd&expand=1587)
12906#[inline]
12907#[target_feature(enable = "avx512f")]
12908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12909#[cfg_attr(test, assert_instr(vcvtudq2pd))]
12910pub fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
12911 unsafe {
12912 let convert: f64x8 = _mm512_cvtepu32lo_pd(v2).as_f64x8();
12913 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_f64x8()))
12914 }
12915}
12916
12917/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12918///
12919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi16&expand=1419)
12920#[inline]
12921#[target_feature(enable = "avx512f")]
12922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12923#[cfg_attr(test, assert_instr(vpmovdw))]
12924pub fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
12925 unsafe {
12926 let a: i32x16 = a.as_i32x16();
12927 transmute::<i16x16, _>(src:simd_cast(a))
12928 }
12929}
12930
12931/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12932///
12933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi16&expand=1420)
12934#[inline]
12935#[target_feature(enable = "avx512f")]
12936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12937#[cfg_attr(test, assert_instr(vpmovdw))]
12938pub fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
12939 unsafe {
12940 let convert: i16x16 = _mm512_cvtepi32_epi16(a).as_i16x16();
12941 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x16()))
12942 }
12943}
12944
12945/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12946///
12947/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi16&expand=1421)
12948#[inline]
12949#[target_feature(enable = "avx512f")]
12950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12951#[cfg_attr(test, assert_instr(vpmovdw))]
12952pub fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
12953 unsafe {
12954 let convert: i16x16 = _mm512_cvtepi32_epi16(a).as_i16x16();
12955 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i16x16::ZERO))
12956 }
12957}
12958
12959/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
12960///
12961/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi16&expand=1416)
12962#[inline]
12963#[target_feature(enable = "avx512f,avx512vl")]
12964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12965#[cfg_attr(test, assert_instr(vpmovdw))]
12966pub fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
12967 unsafe {
12968 let a: i32x8 = a.as_i32x8();
12969 transmute::<i16x8, _>(src:simd_cast(a))
12970 }
12971}
12972
12973/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
12974///
12975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi16&expand=1417)
12976#[inline]
12977#[target_feature(enable = "avx512f,avx512vl")]
12978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12979#[cfg_attr(test, assert_instr(vpmovdw))]
12980pub fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
12981 unsafe {
12982 let convert: i16x8 = _mm256_cvtepi32_epi16(a).as_i16x8();
12983 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x8()))
12984 }
12985}
12986
12987/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
12988///
12989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi16&expand=1418)
12990#[inline]
12991#[target_feature(enable = "avx512f,avx512vl")]
12992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
12993#[cfg_attr(test, assert_instr(vpmovdw))]
12994pub fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
12995 unsafe {
12996 let convert: i16x8 = _mm256_cvtepi32_epi16(a).as_i16x8();
12997 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i16x8::ZERO))
12998 }
12999}
13000
13001/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13002///
13003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi16&expand=1413)
13004#[inline]
13005#[target_feature(enable = "avx512f,avx512vl")]
13006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13007#[cfg_attr(test, assert_instr(vpmovdw))]
13008pub fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
13009 unsafe { transmute(src:vpmovdw128(a.as_i32x4(), src:i16x8::ZERO, mask:0b11111111)) }
13010}
13011
13012/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13013///
13014/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi16&expand=1414)
13015#[inline]
13016#[target_feature(enable = "avx512f,avx512vl")]
13017#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13018#[cfg_attr(test, assert_instr(vpmovdw))]
13019pub fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13020 unsafe { transmute(src:vpmovdw128(a.as_i32x4(), src.as_i16x8(), mask:k)) }
13021}
13022
13023/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13024///
13025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi16&expand=1415)
13026#[inline]
13027#[target_feature(enable = "avx512f,avx512vl")]
13028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13029#[cfg_attr(test, assert_instr(vpmovdw))]
13030pub fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13031 unsafe { transmute(src:vpmovdw128(a.as_i32x4(), src:i16x8::ZERO, mask:k)) }
13032}
13033
13034/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13035///
13036/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi32_epi8&expand=1437)
13037#[inline]
13038#[target_feature(enable = "avx512f")]
13039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13040#[cfg_attr(test, assert_instr(vpmovdb))]
13041pub fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
13042 unsafe {
13043 let a: i32x16 = a.as_i32x16();
13044 transmute::<i8x16, _>(src:simd_cast(a))
13045 }
13046}
13047
13048/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13049///
13050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_epi8&expand=1438)
13051#[inline]
13052#[target_feature(enable = "avx512f")]
13053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13054#[cfg_attr(test, assert_instr(vpmovdb))]
13055pub fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13056 unsafe {
13057 let convert: i8x16 = _mm512_cvtepi32_epi8(a).as_i8x16();
13058 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i8x16()))
13059 }
13060}
13061
13062/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13063///
13064/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi32_epi8&expand=1439)
13065#[inline]
13066#[target_feature(enable = "avx512f")]
13067#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13068#[cfg_attr(test, assert_instr(vpmovdb))]
13069pub fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13070 unsafe {
13071 let convert: i8x16 = _mm512_cvtepi32_epi8(a).as_i8x16();
13072 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i8x16::ZERO))
13073 }
13074}
13075
13076/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13077///
13078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi32_epi8&expand=1434)
13079#[inline]
13080#[target_feature(enable = "avx512f,avx512vl")]
13081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13082#[cfg_attr(test, assert_instr(vpmovdb))]
13083pub fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
13084 unsafe { transmute(src:vpmovdb256(a.as_i32x8(), src:i8x16::ZERO, mask:0b11111111)) }
13085}
13086
13087/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13088///
13089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_epi8&expand=1435)
13090#[inline]
13091#[target_feature(enable = "avx512f,avx512vl")]
13092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13093#[cfg_attr(test, assert_instr(vpmovdb))]
13094pub fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13095 unsafe { transmute(src:vpmovdb256(a.as_i32x8(), src.as_i8x16(), mask:k)) }
13096}
13097
13098/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13099///
13100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi32_epi8&expand=1436)
13101#[inline]
13102#[target_feature(enable = "avx512f,avx512vl")]
13103#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13104#[cfg_attr(test, assert_instr(vpmovdb))]
13105pub fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13106 unsafe { transmute(src:vpmovdb256(a.as_i32x8(), src:i8x16::ZERO, mask:k)) }
13107}
13108
13109/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13110///
13111/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi32_epi8&expand=1431)
13112#[inline]
13113#[target_feature(enable = "avx512f,avx512vl")]
13114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13115#[cfg_attr(test, assert_instr(vpmovdb))]
13116pub fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
13117 unsafe { transmute(src:vpmovdb128(a.as_i32x4(), src:i8x16::ZERO, mask:0b11111111)) }
13118}
13119
13120/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13121///
13122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_epi8&expand=1432)
13123#[inline]
13124#[target_feature(enable = "avx512f,avx512vl")]
13125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13126#[cfg_attr(test, assert_instr(vpmovdb))]
13127pub fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13128 unsafe { transmute(src:vpmovdb128(a.as_i32x4(), src.as_i8x16(), mask:k)) }
13129}
13130
13131/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13132///
13133/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi32_epi8&expand=1433)
13134#[inline]
13135#[target_feature(enable = "avx512f,avx512vl")]
13136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13137#[cfg_attr(test, assert_instr(vpmovdb))]
13138pub fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13139 unsafe { transmute(src:vpmovdb128(a.as_i32x4(), src:i8x16::ZERO, mask:k)) }
13140}
13141
13142/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13143///
13144/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi32&expand=1481)
13145#[inline]
13146#[target_feature(enable = "avx512f")]
13147#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13148#[cfg_attr(test, assert_instr(vpmovqd))]
13149pub fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
13150 unsafe {
13151 let a: i64x8 = a.as_i64x8();
13152 transmute::<i32x8, _>(src:simd_cast(a))
13153 }
13154}
13155
13156/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13157///
13158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi32&expand=1482)
13159#[inline]
13160#[target_feature(enable = "avx512f")]
13161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13162#[cfg_attr(test, assert_instr(vpmovqd))]
13163pub fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13164 unsafe {
13165 let convert: i32x8 = _mm512_cvtepi64_epi32(a).as_i32x8();
13166 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x8()))
13167 }
13168}
13169
13170/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13171///
13172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi32&expand=1483)
13173#[inline]
13174#[target_feature(enable = "avx512f")]
13175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13176#[cfg_attr(test, assert_instr(vpmovqd))]
13177pub fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13178 unsafe {
13179 let convert: i32x8 = _mm512_cvtepi64_epi32(a).as_i32x8();
13180 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x8::ZERO))
13181 }
13182}
13183
13184/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13185///
13186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi32&expand=1478)
13187#[inline]
13188#[target_feature(enable = "avx512f,avx512vl")]
13189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13190#[cfg_attr(test, assert_instr(vpmovqd))]
13191pub fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
13192 unsafe {
13193 let a: i64x4 = a.as_i64x4();
13194 transmute::<i32x4, _>(src:simd_cast(a))
13195 }
13196}
13197
13198/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13199///
13200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi32&expand=1479)
13201#[inline]
13202#[target_feature(enable = "avx512f,avx512vl")]
13203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13204#[cfg_attr(test, assert_instr(vpmovqd))]
13205pub fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13206 unsafe {
13207 let convert: i32x4 = _mm256_cvtepi64_epi32(a).as_i32x4();
13208 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i32x4()))
13209 }
13210}
13211
13212/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13213///
13214/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi32&expand=1480)
13215#[inline]
13216#[target_feature(enable = "avx512f,avx512vl")]
13217#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13218#[cfg_attr(test, assert_instr(vpmovqd))]
13219pub fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13220 unsafe {
13221 let convert: i32x4 = _mm256_cvtepi64_epi32(a).as_i32x4();
13222 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i32x4::ZERO))
13223 }
13224}
13225
13226/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst.
13227///
13228/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi32&expand=1475)
13229#[inline]
13230#[target_feature(enable = "avx512f,avx512vl")]
13231#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13232#[cfg_attr(test, assert_instr(vpmovqd))]
13233pub fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
13234 unsafe { transmute(src:vpmovqd128(a.as_i64x2(), src:i32x4::ZERO, mask:0b11111111)) }
13235}
13236
13237/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13238///
13239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi32&expand=1476)
13240#[inline]
13241#[target_feature(enable = "avx512f,avx512vl")]
13242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13243#[cfg_attr(test, assert_instr(vpmovqd))]
13244pub fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13245 unsafe { transmute(src:vpmovqd128(a.as_i64x2(), src.as_i32x4(), mask:k)) }
13246}
13247
13248/// Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13249///
13250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi32&expand=1477)
13251#[inline]
13252#[target_feature(enable = "avx512f,avx512vl")]
13253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13254#[cfg_attr(test, assert_instr(vpmovqd))]
13255pub fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13256 unsafe { transmute(src:vpmovqd128(a.as_i64x2(), src:i32x4::ZERO, mask:k)) }
13257}
13258
13259/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13260///
13261/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi16&expand=1472)
13262#[inline]
13263#[target_feature(enable = "avx512f")]
13264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13265#[cfg_attr(test, assert_instr(vpmovqw))]
13266pub fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
13267 unsafe {
13268 let a: i64x8 = a.as_i64x8();
13269 transmute::<i16x8, _>(src:simd_cast(a))
13270 }
13271}
13272
13273/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13274///
13275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi16&expand=1473)
13276#[inline]
13277#[target_feature(enable = "avx512f")]
13278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13279#[cfg_attr(test, assert_instr(vpmovqw))]
13280pub fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13281 unsafe {
13282 let convert: i16x8 = _mm512_cvtepi64_epi16(a).as_i16x8();
13283 transmute(src:simd_select_bitmask(m:k, yes:convert, no:src.as_i16x8()))
13284 }
13285}
13286
13287/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13288///
13289/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi16&expand=1474)
13290#[inline]
13291#[target_feature(enable = "avx512f")]
13292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13293#[cfg_attr(test, assert_instr(vpmovqw))]
13294pub fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13295 unsafe {
13296 let convert: i16x8 = _mm512_cvtepi64_epi16(a).as_i16x8();
13297 transmute(src:simd_select_bitmask(m:k, yes:convert, no:i16x8::ZERO))
13298 }
13299}
13300
13301/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13302///
13303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi16&expand=1469)
13304#[inline]
13305#[target_feature(enable = "avx512f,avx512vl")]
13306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13307#[cfg_attr(test, assert_instr(vpmovqw))]
13308pub fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
13309 unsafe { transmute(src:vpmovqw256(a.as_i64x4(), src:i16x8::ZERO, mask:0b11111111)) }
13310}
13311
13312/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13313///
13314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi16&expand=1470)
13315#[inline]
13316#[target_feature(enable = "avx512f,avx512vl")]
13317#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13318#[cfg_attr(test, assert_instr(vpmovqw))]
13319pub fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13320 unsafe { transmute(src:vpmovqw256(a.as_i64x4(), src.as_i16x8(), mask:k)) }
13321}
13322
13323/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13324///
13325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi16&expand=1471)
13326#[inline]
13327#[target_feature(enable = "avx512f,avx512vl")]
13328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13329#[cfg_attr(test, assert_instr(vpmovqw))]
13330pub fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13331 unsafe { transmute(src:vpmovqw256(a.as_i64x4(), src:i16x8::ZERO, mask:k)) }
13332}
13333
13334/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst.
13335///
13336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi16&expand=1466)
13337#[inline]
13338#[target_feature(enable = "avx512f,avx512vl")]
13339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13340#[cfg_attr(test, assert_instr(vpmovqw))]
13341pub fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
13342 unsafe { transmute(src:vpmovqw128(a.as_i64x2(), src:i16x8::ZERO, mask:0b11111111)) }
13343}
13344
13345/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13346///
13347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi16&expand=1467)
13348#[inline]
13349#[target_feature(enable = "avx512f,avx512vl")]
13350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13351#[cfg_attr(test, assert_instr(vpmovqw))]
13352pub fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13353 unsafe { transmute(src:vpmovqw128(a.as_i64x2(), src.as_i16x8(), mask:k)) }
13354}
13355
13356/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13357///
13358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi16&expand=1468)
13359#[inline]
13360#[target_feature(enable = "avx512f,avx512vl")]
13361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13362#[cfg_attr(test, assert_instr(vpmovqw))]
13363pub fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13364 unsafe { transmute(src:vpmovqw128(a.as_i64x2(), src:i16x8::ZERO, mask:k)) }
13365}
13366
13367/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13368///
13369/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtepi64_epi8&expand=1490)
13370#[inline]
13371#[target_feature(enable = "avx512f")]
13372#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13373#[cfg_attr(test, assert_instr(vpmovqb))]
13374pub fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
13375 unsafe { transmute(src:vpmovqb(a.as_i64x8(), src:i8x16::ZERO, mask:0b11111111)) }
13376}
13377
13378/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13379///
13380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_epi8&expand=1491)
13381#[inline]
13382#[target_feature(enable = "avx512f")]
13383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13384#[cfg_attr(test, assert_instr(vpmovqb))]
13385pub fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13386 unsafe { transmute(src:vpmovqb(a.as_i64x8(), src.as_i8x16(), mask:k)) }
13387}
13388
13389/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13390///
13391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtepi64_epi8&expand=1492)
13392#[inline]
13393#[target_feature(enable = "avx512f")]
13394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13395#[cfg_attr(test, assert_instr(vpmovqb))]
13396pub fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13397 unsafe { transmute(src:vpmovqb(a.as_i64x8(), src:i8x16::ZERO, mask:k)) }
13398}
13399
13400/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13401///
13402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtepi64_epi8&expand=1487)
13403#[inline]
13404#[target_feature(enable = "avx512f,avx512vl")]
13405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13406#[cfg_attr(test, assert_instr(vpmovqb))]
13407pub fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
13408 unsafe { transmute(src:vpmovqb256(a.as_i64x4(), src:i8x16::ZERO, mask:0b11111111)) }
13409}
13410
13411/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13412///
13413/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_epi8&expand=1488)
13414#[inline]
13415#[target_feature(enable = "avx512f,avx512vl")]
13416#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13417#[cfg_attr(test, assert_instr(vpmovqb))]
13418pub fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13419 unsafe { transmute(src:vpmovqb256(a.as_i64x4(), src.as_i8x16(), mask:k)) }
13420}
13421
13422/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13423///
13424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtepi64_epi8&expand=1489)
13425#[inline]
13426#[target_feature(enable = "avx512f,avx512vl")]
13427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13428#[cfg_attr(test, assert_instr(vpmovqb))]
13429pub fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13430 unsafe { transmute(src:vpmovqb256(a.as_i64x4(), src:i8x16::ZERO, mask:k)) }
13431}
13432
13433/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst.
13434///
13435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtepi64_epi8&expand=1484)
13436#[inline]
13437#[target_feature(enable = "avx512f,avx512vl")]
13438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13439#[cfg_attr(test, assert_instr(vpmovqb))]
13440pub fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
13441 unsafe { transmute(src:vpmovqb128(a.as_i64x2(), src:i8x16::ZERO, mask:0b11111111)) }
13442}
13443
13444/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13445///
13446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_epi8&expand=1485)
13447#[inline]
13448#[target_feature(enable = "avx512f,avx512vl")]
13449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13450#[cfg_attr(test, assert_instr(vpmovqb))]
13451pub fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13452 unsafe { transmute(src:vpmovqb128(a.as_i64x2(), src.as_i8x16(), mask:k)) }
13453}
13454
13455/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13456///
13457/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtepi64_epi8&expand=1486)
13458#[inline]
13459#[target_feature(enable = "avx512f,avx512vl")]
13460#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13461#[cfg_attr(test, assert_instr(vpmovqb))]
13462pub fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13463 unsafe { transmute(src:vpmovqb128(a.as_i64x2(), src:i8x16::ZERO, mask:k)) }
13464}
13465
13466/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13467///
13468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi16&expand=1819)
13469#[inline]
13470#[target_feature(enable = "avx512f")]
13471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13472#[cfg_attr(test, assert_instr(vpmovsdw))]
13473pub fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
13474 unsafe { transmute(src:vpmovsdw(a.as_i32x16(), src:i16x16::ZERO, mask:0b11111111_11111111)) }
13475}
13476
13477/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13478///
13479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi16&expand=1820)
13480#[inline]
13481#[target_feature(enable = "avx512f")]
13482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13483#[cfg_attr(test, assert_instr(vpmovsdw))]
13484pub fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13485 unsafe { transmute(src:vpmovsdw(a.as_i32x16(), src.as_i16x16(), mask:k)) }
13486}
13487
13488/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13489///
13490/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi16&expand=1819)
13491#[inline]
13492#[target_feature(enable = "avx512f")]
13493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13494#[cfg_attr(test, assert_instr(vpmovsdw))]
13495pub fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13496 unsafe { transmute(src:vpmovsdw(a.as_i32x16(), src:i16x16::ZERO, mask:k)) }
13497}
13498
13499/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13500///
13501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi16&expand=1816)
13502#[inline]
13503#[target_feature(enable = "avx512f,avx512vl")]
13504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13505#[cfg_attr(test, assert_instr(vpmovsdw))]
13506pub fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
13507 unsafe { transmute(src:vpmovsdw256(a.as_i32x8(), src:i16x8::ZERO, mask:0b11111111)) }
13508}
13509
13510/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13511///
13512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi16&expand=1817)
13513#[inline]
13514#[target_feature(enable = "avx512f,avx512vl")]
13515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13516#[cfg_attr(test, assert_instr(vpmovsdw))]
13517pub fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13518 unsafe { transmute(src:vpmovsdw256(a.as_i32x8(), src.as_i16x8(), mask:k)) }
13519}
13520
13521/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13522///
13523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi16&expand=1818)
13524#[inline]
13525#[target_feature(enable = "avx512f,avx512vl")]
13526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13527#[cfg_attr(test, assert_instr(vpmovsdw))]
13528pub fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
13529 unsafe { transmute(src:vpmovsdw256(a.as_i32x8(), src:i16x8::ZERO, mask:k)) }
13530}
13531
13532/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13533///
13534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi16&expand=1813)
13535#[inline]
13536#[target_feature(enable = "avx512f,avx512vl")]
13537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13538#[cfg_attr(test, assert_instr(vpmovsdw))]
13539pub fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
13540 unsafe { transmute(src:vpmovsdw128(a.as_i32x4(), src:i16x8::ZERO, mask:0b11111111)) }
13541}
13542
13543/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13544///
13545/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi16&expand=1814)
13546#[inline]
13547#[target_feature(enable = "avx512f,avx512vl")]
13548#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13549#[cfg_attr(test, assert_instr(vpmovsdw))]
13550pub fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13551 unsafe { transmute(src:vpmovsdw128(a.as_i32x4(), src.as_i16x8(), mask:k)) }
13552}
13553
13554/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13555///
13556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi16&expand=1815)
13557#[inline]
13558#[target_feature(enable = "avx512f,avx512vl")]
13559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13560#[cfg_attr(test, assert_instr(vpmovsdw))]
13561pub fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
13562 unsafe { transmute(src:vpmovsdw128(a.as_i32x4(), src:i16x8::ZERO, mask:k)) }
13563}
13564
13565/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13566///
13567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi32_epi8&expand=1828)
13568#[inline]
13569#[target_feature(enable = "avx512f")]
13570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13571#[cfg_attr(test, assert_instr(vpmovsdb))]
13572pub fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
13573 unsafe { transmute(src:vpmovsdb(a.as_i32x16(), src:i8x16::ZERO, mask:0b11111111_11111111)) }
13574}
13575
13576/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13577///
13578/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_epi8&expand=1829)
13579#[inline]
13580#[target_feature(enable = "avx512f")]
13581#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13582#[cfg_attr(test, assert_instr(vpmovsdb))]
13583pub fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
13584 unsafe { transmute(src:vpmovsdb(a.as_i32x16(), src.as_i8x16(), mask:k)) }
13585}
13586
13587/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13588///
13589/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi32_epi8&expand=1830)
13590#[inline]
13591#[target_feature(enable = "avx512f")]
13592#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13593#[cfg_attr(test, assert_instr(vpmovsdb))]
13594pub fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
13595 unsafe { transmute(src:vpmovsdb(a.as_i32x16(), src:i8x16::ZERO, mask:k)) }
13596}
13597
13598/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13599///
13600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi32_epi8&expand=1825)
13601#[inline]
13602#[target_feature(enable = "avx512f,avx512vl")]
13603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13604#[cfg_attr(test, assert_instr(vpmovsdb))]
13605pub fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
13606 unsafe { transmute(src:vpmovsdb256(a.as_i32x8(), src:i8x16::ZERO, mask:0b11111111)) }
13607}
13608
13609/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13610///
13611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_epi8&expand=1826)
13612#[inline]
13613#[target_feature(enable = "avx512f,avx512vl")]
13614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13615#[cfg_attr(test, assert_instr(vpmovsdb))]
13616pub fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13617 unsafe { transmute(src:vpmovsdb256(a.as_i32x8(), src.as_i8x16(), mask:k)) }
13618}
13619
13620/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13621///
13622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi32_epi8&expand=1827)
13623#[inline]
13624#[target_feature(enable = "avx512f,avx512vl")]
13625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13626#[cfg_attr(test, assert_instr(vpmovsdb))]
13627pub fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
13628 unsafe { transmute(src:vpmovsdb256(a.as_i32x8(), src:i8x16::ZERO, mask:k)) }
13629}
13630
13631/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13632///
13633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi32_epi8&expand=1822)
13634#[inline]
13635#[target_feature(enable = "avx512f,avx512vl")]
13636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13637#[cfg_attr(test, assert_instr(vpmovsdb))]
13638pub fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
13639 unsafe { transmute(src:vpmovsdb128(a.as_i32x4(), src:i8x16::ZERO, mask:0b11111111)) }
13640}
13641
13642/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13643///
13644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_epi8&expand=1823)
13645#[inline]
13646#[target_feature(enable = "avx512f,avx512vl")]
13647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13648#[cfg_attr(test, assert_instr(vpmovsdb))]
13649pub fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13650 unsafe { transmute(src:vpmovsdb128(a.as_i32x4(), src.as_i8x16(), mask:k)) }
13651}
13652
13653/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13654///
13655/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi32_epi8&expand=1824)
13656#[inline]
13657#[target_feature(enable = "avx512f,avx512vl")]
13658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13659#[cfg_attr(test, assert_instr(vpmovsdb))]
13660pub fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
13661 unsafe { transmute(src:vpmovsdb128(a.as_i32x4(), src:i8x16::ZERO, mask:k)) }
13662}
13663
13664/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13665///
13666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi32&expand=1852)
13667#[inline]
13668#[target_feature(enable = "avx512f")]
13669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13670#[cfg_attr(test, assert_instr(vpmovsqd))]
13671pub fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
13672 unsafe { transmute(src:vpmovsqd(a.as_i64x8(), src:i32x8::ZERO, mask:0b11111111)) }
13673}
13674
13675/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13676///
13677/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi32&expand=1853)
13678#[inline]
13679#[target_feature(enable = "avx512f")]
13680#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13681#[cfg_attr(test, assert_instr(vpmovsqd))]
13682pub fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
13683 unsafe { transmute(src:vpmovsqd(a.as_i64x8(), src.as_i32x8(), mask:k)) }
13684}
13685
13686/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13687///
13688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi32&expand=1854)
13689#[inline]
13690#[target_feature(enable = "avx512f")]
13691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13692#[cfg_attr(test, assert_instr(vpmovsqd))]
13693pub fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
13694 unsafe { transmute(src:vpmovsqd(a.as_i64x8(), src:i32x8::ZERO, mask:k)) }
13695}
13696
13697/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13698///
13699/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi32&expand=1849)
13700#[inline]
13701#[target_feature(enable = "avx512f,avx512vl")]
13702#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13703#[cfg_attr(test, assert_instr(vpmovsqd))]
13704pub fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
13705 unsafe { transmute(src:vpmovsqd256(a.as_i64x4(), src:i32x4::ZERO, mask:0b11111111)) }
13706}
13707
13708/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13709///
13710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi32&expand=1850)
13711#[inline]
13712#[target_feature(enable = "avx512f,avx512vl")]
13713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13714#[cfg_attr(test, assert_instr(vpmovsqd))]
13715pub fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13716 unsafe { transmute(src:vpmovsqd256(a.as_i64x4(), src.as_i32x4(), mask:k)) }
13717}
13718
13719/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13720///
13721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi32&expand=1851)
13722#[inline]
13723#[target_feature(enable = "avx512f,avx512vl")]
13724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13725#[cfg_attr(test, assert_instr(vpmovsqd))]
13726pub fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
13727 unsafe { transmute(src:vpmovsqd256(a.as_i64x4(), src:i32x4::ZERO, mask:k)) }
13728}
13729
13730/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst.
13731///
13732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi32&expand=1846)
13733#[inline]
13734#[target_feature(enable = "avx512f,avx512vl")]
13735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13736#[cfg_attr(test, assert_instr(vpmovsqd))]
13737pub fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
13738 unsafe { transmute(src:vpmovsqd128(a.as_i64x2(), src:i32x4::ZERO, mask:0b11111111)) }
13739}
13740
13741/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13742///
13743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi32&expand=1847)
13744#[inline]
13745#[target_feature(enable = "avx512f,avx512vl")]
13746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13747#[cfg_attr(test, assert_instr(vpmovsqd))]
13748pub fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13749 unsafe { transmute(src:vpmovsqd128(a.as_i64x2(), src.as_i32x4(), mask:k)) }
13750}
13751
13752/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13753///
13754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi32&expand=1848)
13755#[inline]
13756#[target_feature(enable = "avx512f,avx512vl")]
13757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13758#[cfg_attr(test, assert_instr(vpmovsqd))]
13759pub fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
13760 unsafe { transmute(src:vpmovsqd128(a.as_i64x2(), src:i32x4::ZERO, mask:k)) }
13761}
13762
13763/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13764///
13765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi16&expand=1843)
13766#[inline]
13767#[target_feature(enable = "avx512f")]
13768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13769#[cfg_attr(test, assert_instr(vpmovsqw))]
13770pub fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
13771 unsafe { transmute(src:vpmovsqw(a.as_i64x8(), src:i16x8::ZERO, mask:0b11111111)) }
13772}
13773
13774/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13775///
13776/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi16&expand=1844)
13777#[inline]
13778#[target_feature(enable = "avx512f")]
13779#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13780#[cfg_attr(test, assert_instr(vpmovsqw))]
13781pub fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13782 unsafe { transmute(src:vpmovsqw(a.as_i64x8(), src.as_i16x8(), mask:k)) }
13783}
13784
13785/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13786///
13787/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi16&expand=1845)
13788#[inline]
13789#[target_feature(enable = "avx512f")]
13790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13791#[cfg_attr(test, assert_instr(vpmovsqw))]
13792pub fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
13793 unsafe { transmute(src:vpmovsqw(a.as_i64x8(), src:i16x8::ZERO, mask:k)) }
13794}
13795
13796/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13797///
13798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi16&expand=1840)
13799#[inline]
13800#[target_feature(enable = "avx512f,avx512vl")]
13801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13802#[cfg_attr(test, assert_instr(vpmovsqw))]
13803pub fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
13804 unsafe { transmute(src:vpmovsqw256(a.as_i64x4(), src:i16x8::ZERO, mask:0b11111111)) }
13805}
13806
13807/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13808///
13809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi16&expand=1841)
13810#[inline]
13811#[target_feature(enable = "avx512f,avx512vl")]
13812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13813#[cfg_attr(test, assert_instr(vpmovsqw))]
13814pub fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13815 unsafe { transmute(src:vpmovsqw256(a.as_i64x4(), src.as_i16x8(), mask:k)) }
13816}
13817
13818/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13819///
13820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi16&expand=1842)
13821#[inline]
13822#[target_feature(enable = "avx512f,avx512vl")]
13823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13824#[cfg_attr(test, assert_instr(vpmovsqw))]
13825pub fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
13826 unsafe { transmute(src:vpmovsqw256(a.as_i64x4(), src:i16x8::ZERO, mask:k)) }
13827}
13828
13829/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst.
13830///
13831/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi16&expand=1837)
13832#[inline]
13833#[target_feature(enable = "avx512f,avx512vl")]
13834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13835#[cfg_attr(test, assert_instr(vpmovsqw))]
13836pub fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
13837 unsafe { transmute(src:vpmovsqw128(a.as_i64x2(), src:i16x8::ZERO, mask:0b11111111)) }
13838}
13839
13840/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13841///
13842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi16&expand=1838)
13843#[inline]
13844#[target_feature(enable = "avx512f,avx512vl")]
13845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13846#[cfg_attr(test, assert_instr(vpmovsqw))]
13847pub fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13848 unsafe { transmute(src:vpmovsqw128(a.as_i64x2(), src.as_i16x8(), mask:k)) }
13849}
13850
13851/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13852///
13853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi16&expand=1839)
13854#[inline]
13855#[target_feature(enable = "avx512f,avx512vl")]
13856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13857#[cfg_attr(test, assert_instr(vpmovsqw))]
13858pub fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
13859 unsafe { transmute(src:vpmovsqw128(a.as_i64x2(), src:i16x8::ZERO, mask:k)) }
13860}
13861
13862/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13863///
13864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsepi64_epi8&expand=1861)
13865#[inline]
13866#[target_feature(enable = "avx512f")]
13867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13868#[cfg_attr(test, assert_instr(vpmovsqb))]
13869pub fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
13870 unsafe { transmute(src:vpmovsqb(a.as_i64x8(), src:i8x16::ZERO, mask:0b11111111)) }
13871}
13872
13873/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13874///
13875/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_epi8&expand=1862)
13876#[inline]
13877#[target_feature(enable = "avx512f")]
13878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13879#[cfg_attr(test, assert_instr(vpmovsqb))]
13880pub fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
13881 unsafe { transmute(src:vpmovsqb(a.as_i64x8(), src.as_i8x16(), mask:k)) }
13882}
13883
13884/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13885///
13886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtsepi64_epi8&expand=1863)
13887#[inline]
13888#[target_feature(enable = "avx512f")]
13889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13890#[cfg_attr(test, assert_instr(vpmovsqb))]
13891pub fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
13892 unsafe { transmute(src:vpmovsqb(a.as_i64x8(), src:i8x16::ZERO, mask:k)) }
13893}
13894
13895/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13896///
13897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsepi64_epi8&expand=1858)
13898#[inline]
13899#[target_feature(enable = "avx512f,avx512vl")]
13900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13901#[cfg_attr(test, assert_instr(vpmovsqb))]
13902pub fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
13903 unsafe { transmute(src:vpmovsqb256(a.as_i64x4(), src:i8x16::ZERO, mask:0b11111111)) }
13904}
13905
13906/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13907///
13908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_epi8&expand=1859)
13909#[inline]
13910#[target_feature(enable = "avx512f,avx512vl")]
13911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13912#[cfg_attr(test, assert_instr(vpmovsqb))]
13913pub fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
13914 unsafe { transmute(src:vpmovsqb256(a.as_i64x4(), src.as_i8x16(), mask:k)) }
13915}
13916
13917/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13918///
13919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtsepi64_epi8&expand=1860)
13920#[inline]
13921#[target_feature(enable = "avx512f,avx512vl")]
13922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13923#[cfg_attr(test, assert_instr(vpmovsqb))]
13924pub fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
13925 unsafe { transmute(src:vpmovsqb256(a.as_i64x4(), src:i8x16::ZERO, mask:k)) }
13926}
13927
13928/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst.
13929///
13930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtsepi64_epi8&expand=1855)
13931#[inline]
13932#[target_feature(enable = "avx512f,avx512vl")]
13933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13934#[cfg_attr(test, assert_instr(vpmovsqb))]
13935pub fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
13936 unsafe { transmute(src:vpmovsqb128(a.as_i64x2(), src:i8x16::ZERO, mask:0b11111111)) }
13937}
13938
13939/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13940///
13941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_epi8&expand=1856)
13942#[inline]
13943#[target_feature(enable = "avx512f,avx512vl")]
13944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13945#[cfg_attr(test, assert_instr(vpmovsqb))]
13946pub fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
13947 unsafe { transmute(src:vpmovsqb128(a.as_i64x2(), src.as_i8x16(), mask:k)) }
13948}
13949
13950/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13951///
13952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtsepi64_epi8&expand=1857)
13953#[inline]
13954#[target_feature(enable = "avx512f,avx512vl")]
13955#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13956#[cfg_attr(test, assert_instr(vpmovsqb))]
13957pub fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
13958 unsafe { transmute(src:vpmovsqb128(a.as_i64x2(), src:i8x16::ZERO, mask:k)) }
13959}
13960
13961/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13962///
13963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi16&expand=2054)
13964#[inline]
13965#[target_feature(enable = "avx512f")]
13966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13967#[cfg_attr(test, assert_instr(vpmovusdw))]
13968pub fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
13969 unsafe { transmute(src:vpmovusdw(a.as_u32x16(), src:u16x16::ZERO, mask:0b11111111_11111111)) }
13970}
13971
13972/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
13973///
13974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi16&expand=2055)
13975#[inline]
13976#[target_feature(enable = "avx512f")]
13977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13978#[cfg_attr(test, assert_instr(vpmovusdw))]
13979pub fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
13980 unsafe { transmute(src:vpmovusdw(a.as_u32x16(), src.as_u16x16(), mask:k)) }
13981}
13982
13983/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
13984///
13985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi16&expand=2056)
13986#[inline]
13987#[target_feature(enable = "avx512f")]
13988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
13989#[cfg_attr(test, assert_instr(vpmovusdw))]
13990pub fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
13991 unsafe { transmute(src:vpmovusdw(a.as_u32x16(), src:u16x16::ZERO, mask:k)) }
13992}
13993
13994/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
13995///
13996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi16&expand=2051)
13997#[inline]
13998#[target_feature(enable = "avx512f,avx512vl")]
13999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14000#[cfg_attr(test, assert_instr(vpmovusdw))]
14001pub fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
14002 unsafe { transmute(src:vpmovusdw256(a.as_u32x8(), src:u16x8::ZERO, mask:0b11111111)) }
14003}
14004
14005/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14006///
14007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi16&expand=2052)
14008#[inline]
14009#[target_feature(enable = "avx512f,avx512vl")]
14010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14011#[cfg_attr(test, assert_instr(vpmovusdw))]
14012pub fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14013 unsafe { transmute(src:vpmovusdw256(a.as_u32x8(), src.as_u16x8(), mask:k)) }
14014}
14015
14016/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14017///
14018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi16&expand=2053)
14019#[inline]
14020#[target_feature(enable = "avx512f,avx512vl")]
14021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14022#[cfg_attr(test, assert_instr(vpmovusdw))]
14023pub fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
14024 unsafe { transmute(src:vpmovusdw256(a.as_u32x8(), src:u16x8::ZERO, mask:k)) }
14025}
14026
14027/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14028///
14029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi16&expand=2048)
14030#[inline]
14031#[target_feature(enable = "avx512f,avx512vl")]
14032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14033#[cfg_attr(test, assert_instr(vpmovusdw))]
14034pub fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
14035 unsafe { transmute(src:vpmovusdw128(a.as_u32x4(), src:u16x8::ZERO, mask:0b11111111)) }
14036}
14037
14038/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14039///
14040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi16&expand=2049)
14041#[inline]
14042#[target_feature(enable = "avx512f,avx512vl")]
14043#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14044#[cfg_attr(test, assert_instr(vpmovusdw))]
14045pub fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14046 unsafe { transmute(src:vpmovusdw128(a.as_u32x4(), src.as_u16x8(), mask:k)) }
14047}
14048
14049/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14050///
14051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi16&expand=2050)
14052#[inline]
14053#[target_feature(enable = "avx512f,avx512vl")]
14054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14055#[cfg_attr(test, assert_instr(vpmovusdw))]
14056pub fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
14057 unsafe { transmute(src:vpmovusdw128(a.as_u32x4(), src:u16x8::ZERO, mask:k)) }
14058}
14059
14060/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14061///
14062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi32_epi8&expand=2063)
14063#[inline]
14064#[target_feature(enable = "avx512f")]
14065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14066#[cfg_attr(test, assert_instr(vpmovusdb))]
14067pub fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
14068 unsafe { transmute(src:vpmovusdb(a.as_u32x16(), src:u8x16::ZERO, mask:0b11111111_11111111)) }
14069}
14070
14071/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14072///
14073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_epi8&expand=2064)
14074#[inline]
14075#[target_feature(enable = "avx512f")]
14076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14077#[cfg_attr(test, assert_instr(vpmovusdb))]
14078pub fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
14079 unsafe { transmute(src:vpmovusdb(a.as_u32x16(), src.as_u8x16(), mask:k)) }
14080}
14081
14082/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14083///
14084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi32_epi8&expand=2065)
14085#[inline]
14086#[target_feature(enable = "avx512f")]
14087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14088#[cfg_attr(test, assert_instr(vpmovusdb))]
14089pub fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
14090 unsafe { transmute(src:vpmovusdb(a.as_u32x16(), src:u8x16::ZERO, mask:k)) }
14091}
14092
14093/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14094///
14095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi32_epi8&expand=2060)
14096#[inline]
14097#[target_feature(enable = "avx512f,avx512vl")]
14098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14099#[cfg_attr(test, assert_instr(vpmovusdb))]
14100pub fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
14101 unsafe { transmute(src:vpmovusdb256(a.as_u32x8(), src:u8x16::ZERO, mask:0b11111111)) }
14102}
14103
14104/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14105///
14106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_epi8&expand=2061)
14107#[inline]
14108#[target_feature(enable = "avx512f,avx512vl")]
14109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14110#[cfg_attr(test, assert_instr(vpmovusdb))]
14111pub fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14112 unsafe { transmute(src:vpmovusdb256(a.as_u32x8(), src.as_u8x16(), mask:k)) }
14113}
14114
14115/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14116///
14117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi32_epi8&expand=2062)
14118#[inline]
14119#[target_feature(enable = "avx512f,avx512vl")]
14120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14121#[cfg_attr(test, assert_instr(vpmovusdb))]
14122pub fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
14123 unsafe { transmute(src:vpmovusdb256(a.as_u32x8(), src:u8x16::ZERO, mask:k)) }
14124}
14125
14126/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14127///
14128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi32_epi8&expand=2057)
14129#[inline]
14130#[target_feature(enable = "avx512f,avx512vl")]
14131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14132#[cfg_attr(test, assert_instr(vpmovusdb))]
14133pub fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
14134 unsafe { transmute(src:vpmovusdb128(a.as_u32x4(), src:u8x16::ZERO, mask:0b11111111)) }
14135}
14136
14137/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14138///
14139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_epi8&expand=2058)
14140#[inline]
14141#[target_feature(enable = "avx512f,avx512vl")]
14142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14143#[cfg_attr(test, assert_instr(vpmovusdb))]
14144pub fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14145 unsafe { transmute(src:vpmovusdb128(a.as_u32x4(), src.as_u8x16(), mask:k)) }
14146}
14147
14148/// Convert packed unsigned 32-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14149///
14150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi32_epi8&expand=2059)
14151#[inline]
14152#[target_feature(enable = "avx512f,avx512vl")]
14153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14154#[cfg_attr(test, assert_instr(vpmovusdb))]
14155pub fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
14156 unsafe { transmute(src:vpmovusdb128(a.as_u32x4(), src:u8x16::ZERO, mask:k)) }
14157}
14158
14159/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14160///
14161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi32&expand=2087)
14162#[inline]
14163#[target_feature(enable = "avx512f")]
14164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14165#[cfg_attr(test, assert_instr(vpmovusqd))]
14166pub fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
14167 unsafe { transmute(src:vpmovusqd(a.as_u64x8(), src:u32x8::ZERO, mask:0b11111111)) }
14168}
14169
14170/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14171///
14172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi32&expand=2088)
14173#[inline]
14174#[target_feature(enable = "avx512f")]
14175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14176#[cfg_attr(test, assert_instr(vpmovusqd))]
14177pub fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
14178 unsafe { transmute(src:vpmovusqd(a.as_u64x8(), src.as_u32x8(), mask:k)) }
14179}
14180
14181/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14182///
14183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi32&expand=2089)
14184#[inline]
14185#[target_feature(enable = "avx512f")]
14186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14187#[cfg_attr(test, assert_instr(vpmovusqd))]
14188pub fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
14189 unsafe { transmute(src:vpmovusqd(a.as_u64x8(), src:u32x8::ZERO, mask:k)) }
14190}
14191
14192/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14193///
14194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi32&expand=2084)
14195#[inline]
14196#[target_feature(enable = "avx512f,avx512vl")]
14197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14198#[cfg_attr(test, assert_instr(vpmovusqd))]
14199pub fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
14200 unsafe { transmute(src:vpmovusqd256(a.as_u64x4(), src:u32x4::ZERO, mask:0b11111111)) }
14201}
14202
14203/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14204///
14205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi32&expand=2085)
14206#[inline]
14207#[target_feature(enable = "avx512f,avx512vl")]
14208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14209#[cfg_attr(test, assert_instr(vpmovusqd))]
14210pub fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14211 unsafe { transmute(src:vpmovusqd256(a.as_u64x4(), src.as_u32x4(), mask:k)) }
14212}
14213
14214/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14215///
14216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi32&expand=2086)
14217#[inline]
14218#[target_feature(enable = "avx512f,avx512vl")]
14219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14220#[cfg_attr(test, assert_instr(vpmovusqd))]
14221pub fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
14222 unsafe { transmute(src:vpmovusqd256(a.as_u64x4(), src:u32x4::ZERO, mask:k)) }
14223}
14224
14225/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst.
14226///
14227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi32&expand=2081)
14228#[inline]
14229#[target_feature(enable = "avx512f,avx512vl")]
14230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14231#[cfg_attr(test, assert_instr(vpmovusqd))]
14232pub fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
14233 unsafe { transmute(src:vpmovusqd128(a.as_u64x2(), src:u32x4::ZERO, mask:0b11111111)) }
14234}
14235
14236/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14237///
14238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi32&expand=2082)
14239#[inline]
14240#[target_feature(enable = "avx512f,avx512vl")]
14241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14242#[cfg_attr(test, assert_instr(vpmovusqd))]
14243pub fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14244 unsafe { transmute(src:vpmovusqd128(a.as_u64x2(), src.as_u32x4(), mask:k)) }
14245}
14246
14247/// Convert packed unsigned 64-bit integers in a to packed unsigned 32-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14248///
14249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi32&expand=2083)
14250#[inline]
14251#[target_feature(enable = "avx512f,avx512vl")]
14252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14253#[cfg_attr(test, assert_instr(vpmovusqd))]
14254pub fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
14255 unsafe { transmute(src:vpmovusqd128(a.as_u64x2(), src:u32x4::ZERO, mask:k)) }
14256}
14257
14258/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14259///
14260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi16&expand=2078)
14261#[inline]
14262#[target_feature(enable = "avx512f")]
14263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14264#[cfg_attr(test, assert_instr(vpmovusqw))]
14265pub fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
14266 unsafe { transmute(src:vpmovusqw(a.as_u64x8(), src:u16x8::ZERO, mask:0b11111111)) }
14267}
14268
14269/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14270///
14271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi16&expand=2079)
14272#[inline]
14273#[target_feature(enable = "avx512f")]
14274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14275#[cfg_attr(test, assert_instr(vpmovusqw))]
14276pub fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14277 unsafe { transmute(src:vpmovusqw(a.as_u64x8(), src.as_u16x8(), mask:k)) }
14278}
14279
14280/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14281///
14282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi16&expand=2080)
14283#[inline]
14284#[target_feature(enable = "avx512f")]
14285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14286#[cfg_attr(test, assert_instr(vpmovusqw))]
14287pub fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
14288 unsafe { transmute(src:vpmovusqw(a.as_u64x8(), src:u16x8::ZERO, mask:k)) }
14289}
14290
14291/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14292///
14293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi16&expand=2075)
14294#[inline]
14295#[target_feature(enable = "avx512f,avx512vl")]
14296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14297#[cfg_attr(test, assert_instr(vpmovusqw))]
14298pub fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
14299 unsafe { transmute(src:vpmovusqw256(a.as_u64x4(), src:u16x8::ZERO, mask:0b11111111)) }
14300}
14301
14302/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14303///
14304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi16&expand=2076)
14305#[inline]
14306#[target_feature(enable = "avx512f,avx512vl")]
14307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14308#[cfg_attr(test, assert_instr(vpmovusqw))]
14309pub fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14310 unsafe { transmute(src:vpmovusqw256(a.as_u64x4(), src.as_u16x8(), mask:k)) }
14311}
14312
14313/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14314///
14315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi16&expand=2077)
14316#[inline]
14317#[target_feature(enable = "avx512f,avx512vl")]
14318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14319#[cfg_attr(test, assert_instr(vpmovusqw))]
14320pub fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
14321 unsafe { transmute(src:vpmovusqw256(a.as_u64x4(), src:u16x8::ZERO, mask:k)) }
14322}
14323
14324/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst.
14325///
14326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi16&expand=2072)
14327#[inline]
14328#[target_feature(enable = "avx512f,avx512vl")]
14329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14330#[cfg_attr(test, assert_instr(vpmovusqw))]
14331pub fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
14332 unsafe { transmute(src:vpmovusqw128(a.as_u64x2(), src:u16x8::ZERO, mask:0b11111111)) }
14333}
14334
14335/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14336///
14337/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi16&expand=2073)
14338#[inline]
14339#[target_feature(enable = "avx512f,avx512vl")]
14340#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14341#[cfg_attr(test, assert_instr(vpmovusqw))]
14342pub fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14343 unsafe { transmute(src:vpmovusqw128(a.as_u64x2(), src.as_u16x8(), mask:k)) }
14344}
14345
14346/// Convert packed unsigned 64-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14347///
14348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi16&expand=2074)
14349#[inline]
14350#[target_feature(enable = "avx512f,avx512vl")]
14351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14352#[cfg_attr(test, assert_instr(vpmovusqw))]
14353pub fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
14354 unsafe { transmute(src:vpmovusqw128(a.as_u64x2(), src:u16x8::ZERO, mask:k)) }
14355}
14356
14357/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14358///
14359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtusepi64_epi8&expand=2096)
14360#[inline]
14361#[target_feature(enable = "avx512f")]
14362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14363#[cfg_attr(test, assert_instr(vpmovusqb))]
14364pub fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
14365 unsafe { transmute(src:vpmovusqb(a.as_u64x8(), src:u8x16::ZERO, mask:0b11111111)) }
14366}
14367
14368/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14369///
14370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_epi8&expand=2097)
14371#[inline]
14372#[target_feature(enable = "avx512f")]
14373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14374#[cfg_attr(test, assert_instr(vpmovusqb))]
14375pub fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
14376 unsafe { transmute(src:vpmovusqb(a.as_u64x8(), src.as_u8x16(), mask:k)) }
14377}
14378
14379/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14380///
14381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtusepi64_epi8&expand=2098)
14382#[inline]
14383#[target_feature(enable = "avx512f")]
14384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14385#[cfg_attr(test, assert_instr(vpmovusqb))]
14386pub fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
14387 unsafe { transmute(src:vpmovusqb(a.as_u64x8(), src:u8x16::ZERO, mask:k)) }
14388}
14389
14390/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14391///
14392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtusepi64_epi8&expand=2093)
14393#[inline]
14394#[target_feature(enable = "avx512f,avx512vl")]
14395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14396#[cfg_attr(test, assert_instr(vpmovusqb))]
14397pub fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
14398 unsafe { transmute(src:vpmovusqb256(a.as_u64x4(), src:u8x16::ZERO, mask:0b11111111)) }
14399}
14400
14401/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14402///
14403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_epi8&expand=2094)
14404#[inline]
14405#[target_feature(enable = "avx512f,avx512vl")]
14406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14407#[cfg_attr(test, assert_instr(vpmovusqb))]
14408pub fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
14409 unsafe { transmute(src:vpmovusqb256(a.as_u64x4(), src.as_u8x16(), mask:k)) }
14410}
14411
14412/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14413///
14414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtusepi64_epi8&expand=2095)
14415#[inline]
14416#[target_feature(enable = "avx512f,avx512vl")]
14417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14418#[cfg_attr(test, assert_instr(vpmovusqb))]
14419pub fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
14420 unsafe { transmute(src:vpmovusqb256(a.as_u64x4(), src:u8x16::ZERO, mask:k)) }
14421}
14422
14423/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst.
14424///
14425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtusepi64_epi8&expand=2090)
14426#[inline]
14427#[target_feature(enable = "avx512f,avx512vl")]
14428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14429#[cfg_attr(test, assert_instr(vpmovusqb))]
14430pub fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
14431 unsafe { transmute(src:vpmovusqb128(a.as_u64x2(), src:u8x16::ZERO, mask:0b11111111)) }
14432}
14433
14434/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
14435///
14436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_epi8&expand=2091)
14437#[inline]
14438#[target_feature(enable = "avx512f,avx512vl")]
14439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14440#[cfg_attr(test, assert_instr(vpmovusqb))]
14441pub fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
14442 unsafe { transmute(src:vpmovusqb128(a.as_u64x2(), src.as_u8x16(), mask:k)) }
14443}
14444
14445/// Convert packed unsigned 64-bit integers in a to packed unsigned 8-bit integers with unsigned saturation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
14446///
14447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtusepi64_epi8&expand=2092)
14448#[inline]
14449#[target_feature(enable = "avx512f,avx512vl")]
14450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14451#[cfg_attr(test, assert_instr(vpmovusqb))]
14452pub fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
14453 unsafe { transmute(src:vpmovusqb128(a.as_u64x2(), src:u8x16::ZERO, mask:k)) }
14454}
14455
14456/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.
14457///
14458/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:
14459/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14460/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14461/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14462/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14463/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14464///
14465/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epi32&expand=1335)
14466#[inline]
14467#[target_feature(enable = "avx512f")]
14468#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14469#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14470#[rustc_legacy_const_generics(1)]
14471pub fn _mm512_cvt_roundps_epi32<const ROUNDING: i32>(a: __m512) -> __m512i {
14472 unsafe {
14473 static_assert_rounding!(ROUNDING);
14474 let a: f32x16 = a.as_f32x16();
14475 let r: i32x16 = vcvtps2dq(a, src:i32x16::ZERO, mask:0b11111111_11111111, ROUNDING);
14476 transmute(src:r)
14477 }
14478}
14479
14480/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14481///
14482/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14483/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14484/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14485/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14486/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14487/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14488///
14489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epi32&expand=1336)
14490#[inline]
14491#[target_feature(enable = "avx512f")]
14492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14493#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14494#[rustc_legacy_const_generics(3)]
14495pub fn _mm512_mask_cvt_roundps_epi32<const ROUNDING: i32>(
14496 src: __m512i,
14497 k: __mmask16,
14498 a: __m512,
14499) -> __m512i {
14500 unsafe {
14501 static_assert_rounding!(ROUNDING);
14502 let a: f32x16 = a.as_f32x16();
14503 let src: i32x16 = src.as_i32x16();
14504 let r: i32x16 = vcvtps2dq(a, src, mask:k, ROUNDING);
14505 transmute(src:r)
14506 }
14507}
14508
14509/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14510///
14511/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14512/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14513/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14514/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14515/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14516/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14517///
14518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epi32&expand=1337)
14519#[inline]
14520#[target_feature(enable = "avx512f")]
14521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14522#[cfg_attr(test, assert_instr(vcvtps2dq, ROUNDING = 8))]
14523#[rustc_legacy_const_generics(2)]
14524pub fn _mm512_maskz_cvt_roundps_epi32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14525 unsafe {
14526 static_assert_rounding!(ROUNDING);
14527 let a: f32x16 = a.as_f32x16();
14528 let r: i32x16 = vcvtps2dq(a, src:i32x16::ZERO, mask:k, ROUNDING);
14529 transmute(src:r)
14530 }
14531}
14532
14533/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14534///
14535/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14536/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14537/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14538/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14539/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14540/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14541///
14542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_epu32&expand=1341)
14543#[inline]
14544#[target_feature(enable = "avx512f")]
14545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14546#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14547#[rustc_legacy_const_generics(1)]
14548pub fn _mm512_cvt_roundps_epu32<const ROUNDING: i32>(a: __m512) -> __m512i {
14549 unsafe {
14550 static_assert_rounding!(ROUNDING);
14551 let a: f32x16 = a.as_f32x16();
14552 let r: u32x16 = vcvtps2udq(a, src:u32x16::ZERO, mask:0b11111111_11111111, ROUNDING);
14553 transmute(src:r)
14554 }
14555}
14556
14557/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14558///
14559/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14560/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14561/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14562/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14563/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14564/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14565///
14566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_epu32&expand=1342)
14567#[inline]
14568#[target_feature(enable = "avx512f")]
14569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14570#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14571#[rustc_legacy_const_generics(3)]
14572pub fn _mm512_mask_cvt_roundps_epu32<const ROUNDING: i32>(
14573 src: __m512i,
14574 k: __mmask16,
14575 a: __m512,
14576) -> __m512i {
14577 unsafe {
14578 static_assert_rounding!(ROUNDING);
14579 let a: f32x16 = a.as_f32x16();
14580 let src: u32x16 = src.as_u32x16();
14581 let r: u32x16 = vcvtps2udq(a, src, mask:k, ROUNDING);
14582 transmute(src:r)
14583 }
14584}
14585
14586/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14587///
14588/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14589/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14590/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14591/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14592/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14593/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14594///
14595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_epu32&expand=1343)
14596#[inline]
14597#[target_feature(enable = "avx512f")]
14598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14599#[cfg_attr(test, assert_instr(vcvtps2udq, ROUNDING = 8))]
14600#[rustc_legacy_const_generics(2)]
14601pub fn _mm512_maskz_cvt_roundps_epu32<const ROUNDING: i32>(k: __mmask16, a: __m512) -> __m512i {
14602 unsafe {
14603 static_assert_rounding!(ROUNDING);
14604 let a: f32x16 = a.as_f32x16();
14605 let r: u32x16 = vcvtps2udq(a, src:u32x16::ZERO, mask:k, ROUNDING);
14606 transmute(src:r)
14607 }
14608}
14609
14610/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst.\
14611/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14612///
14613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_pd&expand=1347)
14614#[inline]
14615#[target_feature(enable = "avx512f")]
14616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14617#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14618#[rustc_legacy_const_generics(1)]
14619pub fn _mm512_cvt_roundps_pd<const SAE: i32>(a: __m256) -> __m512d {
14620 unsafe {
14621 static_assert_sae!(SAE);
14622 let a: f32x8 = a.as_f32x8();
14623 let r: f64x8 = vcvtps2pd(a, src:f64x8::ZERO, mask:0b11111111, SAE);
14624 transmute(src:r)
14625 }
14626}
14627
14628/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14629/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14630///
14631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_pd&expand=1336)
14632#[inline]
14633#[target_feature(enable = "avx512f")]
14634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14635#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14636#[rustc_legacy_const_generics(3)]
14637pub fn _mm512_mask_cvt_roundps_pd<const SAE: i32>(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
14638 unsafe {
14639 static_assert_sae!(SAE);
14640 let a: f32x8 = a.as_f32x8();
14641 let src: f64x8 = src.as_f64x8();
14642 let r: f64x8 = vcvtps2pd(a, src, mask:k, SAE);
14643 transmute(src:r)
14644 }
14645}
14646
14647/// Convert packed single-precision (32-bit) floating-point elements in a to packed double-precision (64-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14648/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
14649///
14650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_pd&expand=1337)
14651#[inline]
14652#[target_feature(enable = "avx512f")]
14653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14654#[cfg_attr(test, assert_instr(vcvtps2pd, SAE = 8))]
14655#[rustc_legacy_const_generics(2)]
14656pub fn _mm512_maskz_cvt_roundps_pd<const SAE: i32>(k: __mmask8, a: __m256) -> __m512d {
14657 unsafe {
14658 static_assert_sae!(SAE);
14659 let a: f32x8 = a.as_f32x8();
14660 let r: f64x8 = vcvtps2pd(a, src:f64x8::ZERO, mask:k, SAE);
14661 transmute(src:r)
14662 }
14663}
14664
14665/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst.\
14666///
14667/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14668/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14669/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14670/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14671/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14672/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14673///
14674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epi32&expand=1315)
14675#[inline]
14676#[target_feature(enable = "avx512f")]
14677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14678#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14679#[rustc_legacy_const_generics(1)]
14680pub fn _mm512_cvt_roundpd_epi32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14681 unsafe {
14682 static_assert_rounding!(ROUNDING);
14683 let a: f64x8 = a.as_f64x8();
14684 let r: i32x8 = vcvtpd2dq(a, src:i32x8::ZERO, mask:0b11111111, ROUNDING);
14685 transmute(src:r)
14686 }
14687}
14688
14689/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14690///
14691/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14692/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14693/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14694/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14695/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14696/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14697///
14698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epi32&expand=1316)
14699#[inline]
14700#[target_feature(enable = "avx512f")]
14701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14702#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14703#[rustc_legacy_const_generics(3)]
14704pub fn _mm512_mask_cvt_roundpd_epi32<const ROUNDING: i32>(
14705 src: __m256i,
14706 k: __mmask8,
14707 a: __m512d,
14708) -> __m256i {
14709 unsafe {
14710 static_assert_rounding!(ROUNDING);
14711 let a: f64x8 = a.as_f64x8();
14712 let src: i32x8 = src.as_i32x8();
14713 let r: i32x8 = vcvtpd2dq(a, src, mask:k, ROUNDING);
14714 transmute(src:r)
14715 }
14716}
14717
14718/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14719///
14720/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14721/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14722/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14723/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14724/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14725/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14726///
14727/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epi32&expand=1317)
14728#[inline]
14729#[target_feature(enable = "avx512f")]
14730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14731#[cfg_attr(test, assert_instr(vcvtpd2dq, ROUNDING = 8))]
14732#[rustc_legacy_const_generics(2)]
14733pub fn _mm512_maskz_cvt_roundpd_epi32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14734 unsafe {
14735 static_assert_rounding!(ROUNDING);
14736 let a: f64x8 = a.as_f64x8();
14737 let r: i32x8 = vcvtpd2dq(a, src:i32x8::ZERO, mask:k, ROUNDING);
14738 transmute(src:r)
14739 }
14740}
14741
14742/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst.\
14743///
14744/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14745/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14746/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14747/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14748/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14749/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14750///
14751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_epu32&expand=1321)
14752#[inline]
14753#[target_feature(enable = "avx512f")]
14754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14755#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14756#[rustc_legacy_const_generics(1)]
14757pub fn _mm512_cvt_roundpd_epu32<const ROUNDING: i32>(a: __m512d) -> __m256i {
14758 unsafe {
14759 static_assert_rounding!(ROUNDING);
14760 let a: f64x8 = a.as_f64x8();
14761 let r: u32x8 = vcvtpd2udq(a, src:u32x8::ZERO, mask:0b11111111, ROUNDING);
14762 transmute(src:r)
14763 }
14764}
14765
14766/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14767///
14768/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14769/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14770/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14771/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14772/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14773/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14774///
14775/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_epu32&expand=1322)
14776#[inline]
14777#[target_feature(enable = "avx512f")]
14778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14779#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14780#[rustc_legacy_const_generics(3)]
14781pub fn _mm512_mask_cvt_roundpd_epu32<const ROUNDING: i32>(
14782 src: __m256i,
14783 k: __mmask8,
14784 a: __m512d,
14785) -> __m256i {
14786 unsafe {
14787 static_assert_rounding!(ROUNDING);
14788 let a: f64x8 = a.as_f64x8();
14789 let src: u32x8 = src.as_u32x8();
14790 let r: u32x8 = vcvtpd2udq(a, src, mask:k, ROUNDING);
14791 transmute(src:r)
14792 }
14793}
14794
14795/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14796///
14797/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14798/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14799/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14800/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14801/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14802/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14803///
14804/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/IntrinsicsGuide/#text=_mm512_maskz_cvt_roundpd_epu32&expand=1323)
14805#[inline]
14806#[target_feature(enable = "avx512f")]
14807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14808#[cfg_attr(test, assert_instr(vcvtpd2udq, ROUNDING = 8))]
14809#[rustc_legacy_const_generics(2)]
14810pub fn _mm512_maskz_cvt_roundpd_epu32<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256i {
14811 unsafe {
14812 static_assert_rounding!(ROUNDING);
14813 let a: f64x8 = a.as_f64x8();
14814 let r: u32x8 = vcvtpd2udq(a, src:u32x8::ZERO, mask:k, ROUNDING);
14815 transmute(src:r)
14816 }
14817}
14818
14819/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14820///
14821/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14822/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14823/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14824/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14825/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14826/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14827///
14828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundpd_ps&expand=1327)
14829#[inline]
14830#[target_feature(enable = "avx512f")]
14831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14832#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14833#[rustc_legacy_const_generics(1)]
14834pub fn _mm512_cvt_roundpd_ps<const ROUNDING: i32>(a: __m512d) -> __m256 {
14835 unsafe {
14836 static_assert_rounding!(ROUNDING);
14837 let a: f64x8 = a.as_f64x8();
14838 let r: f32x8 = vcvtpd2ps(a, src:f32x8::ZERO, mask:0b11111111, ROUNDING);
14839 transmute(src:r)
14840 }
14841}
14842
14843/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14844///
14845/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14846/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14847/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14848/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14849/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14850/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14851///
14852/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundpd_ps&expand=1328)
14853#[inline]
14854#[target_feature(enable = "avx512f")]
14855#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14856#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14857#[rustc_legacy_const_generics(3)]
14858pub fn _mm512_mask_cvt_roundpd_ps<const ROUNDING: i32>(
14859 src: __m256,
14860 k: __mmask8,
14861 a: __m512d,
14862) -> __m256 {
14863 unsafe {
14864 static_assert_rounding!(ROUNDING);
14865 let a: f64x8 = a.as_f64x8();
14866 let src: f32x8 = src.as_f32x8();
14867 let r: f32x8 = vcvtpd2ps(a, src, mask:k, ROUNDING);
14868 transmute(src:r)
14869 }
14870}
14871
14872/// Convert packed double-precision (64-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14873///
14874/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14875/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14876/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14877/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14878/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14879/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14880///
14881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundpd_ps&expand=1329)
14882#[inline]
14883#[target_feature(enable = "avx512f")]
14884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14885#[cfg_attr(test, assert_instr(vcvtpd2ps, ROUNDING = 8))]
14886#[rustc_legacy_const_generics(2)]
14887pub fn _mm512_maskz_cvt_roundpd_ps<const ROUNDING: i32>(k: __mmask8, a: __m512d) -> __m256 {
14888 unsafe {
14889 static_assert_rounding!(ROUNDING);
14890 let a: f64x8 = a.as_f64x8();
14891 let r: f32x8 = vcvtpd2ps(a, src:f32x8::ZERO, mask:k, ROUNDING);
14892 transmute(src:r)
14893 }
14894}
14895
14896/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14897///
14898/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14899/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14900/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14901/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14902/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14903/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14904///
14905/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepi32_ps&expand=1294)
14906#[inline]
14907#[target_feature(enable = "avx512f")]
14908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14909#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14910#[rustc_legacy_const_generics(1)]
14911pub fn _mm512_cvt_roundepi32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14912 unsafe {
14913 static_assert_rounding!(ROUNDING);
14914 let a: i32x16 = a.as_i32x16();
14915 let r: f32x16 = vcvtdq2ps(a, ROUNDING);
14916 transmute(src:r)
14917 }
14918}
14919
14920/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14921///
14922/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14923/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14924/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14925/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14926/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14927/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14928///
14929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepi32_ps&expand=1295)
14930#[inline]
14931#[target_feature(enable = "avx512f")]
14932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14933#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14934#[rustc_legacy_const_generics(3)]
14935pub fn _mm512_mask_cvt_roundepi32_ps<const ROUNDING: i32>(
14936 src: __m512,
14937 k: __mmask16,
14938 a: __m512i,
14939) -> __m512 {
14940 unsafe {
14941 static_assert_rounding!(ROUNDING);
14942 let a: i32x16 = a.as_i32x16();
14943 let r: f32x16 = vcvtdq2ps(a, ROUNDING);
14944 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
14945 }
14946}
14947
14948/// Convert packed signed 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
14949///
14950/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14951/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14952/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14953/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14954/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14955/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14956///
14957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepi32_ps&expand=1296)
14958#[inline]
14959#[target_feature(enable = "avx512f")]
14960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14961#[cfg_attr(test, assert_instr(vcvtdq2ps, ROUNDING = 8))]
14962#[rustc_legacy_const_generics(2)]
14963pub fn _mm512_maskz_cvt_roundepi32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
14964 unsafe {
14965 static_assert_rounding!(ROUNDING);
14966 let a: i32x16 = a.as_i32x16();
14967 let r: f32x16 = vcvtdq2ps(a, ROUNDING);
14968 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
14969 }
14970}
14971
14972/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
14973///
14974/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14975/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
14976/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
14977/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
14978/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
14979/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
14980///
14981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundepu32_ps&expand=1303)
14982#[inline]
14983#[target_feature(enable = "avx512f")]
14984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
14985#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
14986#[rustc_legacy_const_generics(1)]
14987pub fn _mm512_cvt_roundepu32_ps<const ROUNDING: i32>(a: __m512i) -> __m512 {
14988 unsafe {
14989 static_assert_rounding!(ROUNDING);
14990 let a: u32x16 = a.as_u32x16();
14991 let r: f32x16 = vcvtudq2ps(a, ROUNDING);
14992 transmute(src:r)
14993 }
14994}
14995
14996/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
14997///
14998/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
14999/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15000/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15001/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15002/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15003/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15004///
15005/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundepu32_ps&expand=1304)
15006#[inline]
15007#[target_feature(enable = "avx512f")]
15008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15009#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15010#[rustc_legacy_const_generics(3)]
15011pub fn _mm512_mask_cvt_roundepu32_ps<const ROUNDING: i32>(
15012 src: __m512,
15013 k: __mmask16,
15014 a: __m512i,
15015) -> __m512 {
15016 unsafe {
15017 static_assert_rounding!(ROUNDING);
15018 let a: u32x16 = a.as_u32x16();
15019 let r: f32x16 = vcvtudq2ps(a, ROUNDING);
15020 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_f32x16()))
15021 }
15022}
15023
15024/// Convert packed unsigned 32-bit integers in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15025///
15026/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
15027/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15028/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15029/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15030/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15031/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15032///
15033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundepu32_ps&expand=1305)
15034#[inline]
15035#[target_feature(enable = "avx512f")]
15036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15037#[cfg_attr(test, assert_instr(vcvtudq2ps, ROUNDING = 8))]
15038#[rustc_legacy_const_generics(2)]
15039pub fn _mm512_maskz_cvt_roundepu32_ps<const ROUNDING: i32>(k: __mmask16, a: __m512i) -> __m512 {
15040 unsafe {
15041 static_assert_rounding!(ROUNDING);
15042 let a: u32x16 = a.as_u32x16();
15043 let r: f32x16 = vcvtudq2ps(a, ROUNDING);
15044 transmute(src:simd_select_bitmask(m:k, yes:r, no:f32x16::ZERO))
15045 }
15046}
15047
15048/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15049/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15050///
15051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundps_ph&expand=1354)
15052#[inline]
15053#[target_feature(enable = "avx512f")]
15054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15055#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15056#[rustc_legacy_const_generics(1)]
15057pub fn _mm512_cvt_roundps_ph<const SAE: i32>(a: __m512) -> __m256i {
15058 unsafe {
15059 static_assert_sae!(SAE);
15060 let a: f32x16 = a.as_f32x16();
15061 let r: i16x16 = vcvtps2ph(a, SAE, src:i16x16::ZERO, mask:0b11111111_11111111);
15062 transmute(src:r)
15063 }
15064}
15065
15066/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15067/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15068///
15069/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundps_ph&expand=1355)
15070#[inline]
15071#[target_feature(enable = "avx512f")]
15072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15073#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15074#[rustc_legacy_const_generics(3)]
15075pub fn _mm512_mask_cvt_roundps_ph<const SAE: i32>(
15076 src: __m256i,
15077 k: __mmask16,
15078 a: __m512,
15079) -> __m256i {
15080 unsafe {
15081 static_assert_sae!(SAE);
15082 let a: f32x16 = a.as_f32x16();
15083 let src: i16x16 = src.as_i16x16();
15084 let r: i16x16 = vcvtps2ph(a, SAE, src, mask:k);
15085 transmute(src:r)
15086 }
15087}
15088
15089/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15090/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15091///
15092/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundps_ph&expand=1356)
15093#[inline]
15094#[target_feature(enable = "avx512f")]
15095#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15096#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15097#[rustc_legacy_const_generics(2)]
15098pub fn _mm512_maskz_cvt_roundps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
15099 unsafe {
15100 static_assert_sae!(SAE);
15101 let a: f32x16 = a.as_f32x16();
15102 let r: i16x16 = vcvtps2ph(a, SAE, src:i16x16::ZERO, mask:k);
15103 transmute(src:r)
15104 }
15105}
15106
15107/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15108/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:
15109/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15110/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15111/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15112/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15113/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15114///
15115/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvt_roundps_ph&expand=1352)
15116#[inline]
15117#[target_feature(enable = "avx512f,avx512vl")]
15118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15119#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15120#[rustc_legacy_const_generics(3)]
15121pub fn _mm256_mask_cvt_roundps_ph<const IMM8: i32>(
15122 src: __m128i,
15123 k: __mmask8,
15124 a: __m256,
15125) -> __m128i {
15126 unsafe {
15127 static_assert_uimm_bits!(IMM8, 8);
15128 let a: f32x8 = a.as_f32x8();
15129 let src: i16x8 = src.as_i16x8();
15130 let r: i16x8 = vcvtps2ph256(a, IMM8, src, mask:k);
15131 transmute(src:r)
15132 }
15133}
15134
15135/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15136/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15137/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15138/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15139/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15140/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15141/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15142///
15143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvt_roundps_ph&expand=1353)
15144#[inline]
15145#[target_feature(enable = "avx512f,avx512vl")]
15146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15147#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15148#[rustc_legacy_const_generics(2)]
15149pub fn _mm256_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15150 unsafe {
15151 static_assert_uimm_bits!(IMM8, 8);
15152 let a: f32x8 = a.as_f32x8();
15153 let r: i16x8 = vcvtps2ph256(a, IMM8, src:i16x8::ZERO, mask:k);
15154 transmute(src:r)
15155 }
15156}
15157
15158/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15159/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15160/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15161/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15162/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15163/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15164/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15165///
15166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvt_roundps_ph&expand=1350)
15167#[inline]
15168#[target_feature(enable = "avx512f,avx512vl")]
15169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15170#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15171#[rustc_legacy_const_generics(3)]
15172pub fn _mm_mask_cvt_roundps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15173 unsafe {
15174 static_assert_uimm_bits!(IMM8, 8);
15175 let a: f32x4 = a.as_f32x4();
15176 let src: i16x8 = src.as_i16x8();
15177 let r: i16x8 = vcvtps2ph128(a, IMM8, src, mask:k);
15178 transmute(src:r)
15179 }
15180}
15181
15182/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15183/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15184/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
15185/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
15186/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
15187/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
15188/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15189///
15190/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvt_roundps_ph&expand=1351)
15191#[inline]
15192#[target_feature(enable = "avx512f,avx512vl")]
15193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15194#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15195#[rustc_legacy_const_generics(2)]
15196pub fn _mm_maskz_cvt_roundps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15197 unsafe {
15198 static_assert_uimm_bits!(IMM8, 8);
15199 let a: f32x4 = a.as_f32x4();
15200 let r: i16x8 = vcvtps2ph128(a, IMM8, src:i16x8::ZERO, mask:k);
15201 transmute(src:r)
15202 }
15203}
15204
15205/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.\
15206/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15207///
15208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtps_ph&expand=1778)
15209#[inline]
15210#[target_feature(enable = "avx512f")]
15211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15212#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15213#[rustc_legacy_const_generics(1)]
15214pub fn _mm512_cvtps_ph<const SAE: i32>(a: __m512) -> __m256i {
15215 unsafe {
15216 static_assert_sae!(SAE);
15217 let a: f32x16 = a.as_f32x16();
15218 let r: i16x16 = vcvtps2ph(a, SAE, src:i16x16::ZERO, mask:0b11111111_11111111);
15219 transmute(src:r)
15220 }
15221}
15222
15223/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15224/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15225///
15226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtps_ph&expand=1779)
15227#[inline]
15228#[target_feature(enable = "avx512f")]
15229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15230#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15231#[rustc_legacy_const_generics(3)]
15232pub fn _mm512_mask_cvtps_ph<const SAE: i32>(src: __m256i, k: __mmask16, a: __m512) -> __m256i {
15233 unsafe {
15234 static_assert_sae!(SAE);
15235 let a: f32x16 = a.as_f32x16();
15236 let src: i16x16 = src.as_i16x16();
15237 let r: i16x16 = vcvtps2ph(a, SAE, src, mask:k);
15238 transmute(src:r)
15239 }
15240}
15241
15242/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15243/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15244///
15245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtps_ph&expand=1780)
15246#[inline]
15247#[target_feature(enable = "avx512f")]
15248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15249#[cfg_attr(test, assert_instr(vcvtps2ph, SAE = 8))]
15250#[rustc_legacy_const_generics(2)]
15251pub fn _mm512_maskz_cvtps_ph<const SAE: i32>(k: __mmask16, a: __m512) -> __m256i {
15252 unsafe {
15253 static_assert_sae!(SAE);
15254 let a: f32x16 = a.as_f32x16();
15255 let r: i16x16 = vcvtps2ph(a, SAE, src:i16x16::ZERO, mask:k);
15256 transmute(src:r)
15257 }
15258}
15259
15260/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15261/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15262/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15263/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15264/// * [`_MM_FROUND_TO_POS_INF`] : round up
15265/// * [`_MM_FROUND_TO_ZERO`] : truncate
15266/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15267///
15268/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtps_ph&expand=1776)
15269#[inline]
15270#[target_feature(enable = "avx512f,avx512vl")]
15271#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15272#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15273#[rustc_legacy_const_generics(3)]
15274pub fn _mm256_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m256) -> __m128i {
15275 unsafe {
15276 static_assert_uimm_bits!(IMM8, 8);
15277 let a: f32x8 = a.as_f32x8();
15278 let src: i16x8 = src.as_i16x8();
15279 let r: i16x8 = vcvtps2ph256(a, IMM8, src, mask:k);
15280 transmute(src:r)
15281 }
15282}
15283
15284/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15285/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15286/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15287/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15288/// * [`_MM_FROUND_TO_POS_INF`] : round up
15289/// * [`_MM_FROUND_TO_ZERO`] : truncate
15290/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15291///
15292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtps_ph&expand=1777)
15293#[inline]
15294#[target_feature(enable = "avx512f,avx512vl")]
15295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15296#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15297#[rustc_legacy_const_generics(2)]
15298pub fn _mm256_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128i {
15299 unsafe {
15300 static_assert_uimm_bits!(IMM8, 8);
15301 let a: f32x8 = a.as_f32x8();
15302 let r: i16x8 = vcvtps2ph256(a, IMM8, src:i16x8::ZERO, mask:k);
15303 transmute(src:r)
15304 }
15305}
15306
15307/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15308/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15309/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15310/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15311/// * [`_MM_FROUND_TO_POS_INF`] : round up
15312/// * [`_MM_FROUND_TO_ZERO`] : truncate
15313/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15314///
15315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtps_ph&expand=1773)
15316#[inline]
15317#[target_feature(enable = "avx512f,avx512vl")]
15318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15319#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15320#[rustc_legacy_const_generics(3)]
15321pub fn _mm_mask_cvtps_ph<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15322 unsafe {
15323 static_assert_uimm_bits!(IMM8, 8);
15324 let a: f32x4 = a.as_f32x4();
15325 let src: i16x8 = src.as_i16x8();
15326 let r: i16x8 = vcvtps2ph128(a, IMM8, src, mask:k);
15327 transmute(src:r)
15328 }
15329}
15330
15331/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15332/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
15333/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
15334/// * [`_MM_FROUND_TO_NEG_INF`] : round down
15335/// * [`_MM_FROUND_TO_POS_INF`] : round up
15336/// * [`_MM_FROUND_TO_ZERO`] : truncate
15337/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
15338///
15339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtps_ph&expand=1774)
15340#[inline]
15341#[target_feature(enable = "avx512f,avx512vl")]
15342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15343#[cfg_attr(test, assert_instr(vcvtps2ph, IMM8 = 8))]
15344#[rustc_legacy_const_generics(2)]
15345pub fn _mm_maskz_cvtps_ph<const IMM8: i32>(k: __mmask8, a: __m128) -> __m128i {
15346 unsafe {
15347 static_assert_uimm_bits!(IMM8, 8);
15348 let a: f32x4 = a.as_f32x4();
15349 let r: i16x8 = vcvtps2ph128(a, IMM8, src:i16x8::ZERO, mask:k);
15350 transmute(src:r)
15351 }
15352}
15353
15354/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.\
15355/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15356///
15357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvt_roundph_ps&expand=1332)
15358#[inline]
15359#[target_feature(enable = "avx512f")]
15360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15361#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15362#[rustc_legacy_const_generics(1)]
15363pub fn _mm512_cvt_roundph_ps<const SAE: i32>(a: __m256i) -> __m512 {
15364 unsafe {
15365 static_assert_sae!(SAE);
15366 let a: i16x16 = a.as_i16x16();
15367 let r: f32x16 = vcvtph2ps(a, src:f32x16::ZERO, mask:0b11111111_11111111, SAE);
15368 transmute(src:r)
15369 }
15370}
15371
15372/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15373/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15374///
15375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvt_roundph_ps&expand=1333)
15376#[inline]
15377#[target_feature(enable = "avx512f")]
15378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15379#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15380#[rustc_legacy_const_generics(3)]
15381pub fn _mm512_mask_cvt_roundph_ps<const SAE: i32>(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15382 unsafe {
15383 static_assert_sae!(SAE);
15384 let a: i16x16 = a.as_i16x16();
15385 let src: f32x16 = src.as_f32x16();
15386 let r: f32x16 = vcvtph2ps(a, src, mask:k, SAE);
15387 transmute(src:r)
15388 }
15389}
15390
15391/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15392/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15393///
15394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvt_roundph_ps&expand=1334)
15395#[inline]
15396#[target_feature(enable = "avx512f")]
15397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15398#[cfg_attr(test, assert_instr(vcvtph2ps, SAE = 8))]
15399#[rustc_legacy_const_generics(2)]
15400pub fn _mm512_maskz_cvt_roundph_ps<const SAE: i32>(k: __mmask16, a: __m256i) -> __m512 {
15401 unsafe {
15402 static_assert_sae!(SAE);
15403 let a: i16x16 = a.as_i16x16();
15404 let r: f32x16 = vcvtph2ps(a, src:f32x16::ZERO, mask:k, SAE);
15405 transmute(src:r)
15406 }
15407}
15408
15409/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
15410///
15411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtph_ps&expand=1723)
15412#[inline]
15413#[target_feature(enable = "avx512f")]
15414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15415#[cfg_attr(test, assert_instr(vcvtph2ps))]
15416pub fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
15417 unsafe {
15418 transmute(src:vcvtph2ps(
15419 a.as_i16x16(),
15420 src:f32x16::ZERO,
15421 mask:0b11111111_11111111,
15422 _MM_FROUND_NO_EXC,
15423 ))
15424 }
15425}
15426
15427/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15428///
15429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtph_ps&expand=1724)
15430#[inline]
15431#[target_feature(enable = "avx512f")]
15432#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15433#[cfg_attr(test, assert_instr(vcvtph2ps))]
15434pub fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
15435 unsafe {
15436 transmute(src:vcvtph2ps(
15437 a.as_i16x16(),
15438 src.as_f32x16(),
15439 mask:k,
15440 _MM_FROUND_NO_EXC,
15441 ))
15442 }
15443}
15444
15445/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15446///
15447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtph_ps&expand=1725)
15448#[inline]
15449#[target_feature(enable = "avx512f")]
15450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15451#[cfg_attr(test, assert_instr(vcvtph2ps))]
15452pub fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
15453 unsafe { transmute(src:vcvtph2ps(a.as_i16x16(), src:f32x16::ZERO, mask:k, _MM_FROUND_NO_EXC)) }
15454}
15455
15456/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15457///
15458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtph_ps&expand=1721)
15459#[inline]
15460#[target_feature(enable = "avx512f,avx512vl")]
15461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15462#[cfg_attr(test, assert_instr(vcvtph2ps))]
15463pub fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
15464 unsafe {
15465 let convert: __m256 = _mm256_cvtph_ps(a);
15466 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x8(), no:src.as_f32x8()))
15467 }
15468}
15469
15470/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15471///
15472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvtph_ps&expand=1722)
15473#[inline]
15474#[target_feature(enable = "avx512f,avx512vl")]
15475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15476#[cfg_attr(test, assert_instr(vcvtph2ps))]
15477pub fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
15478 unsafe {
15479 let convert: __m256 = _mm256_cvtph_ps(a);
15480 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x8(), no:f32x8::ZERO))
15481 }
15482}
15483
15484/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15485///
15486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtph_ps&expand=1718)
15487#[inline]
15488#[target_feature(enable = "avx512f,avx512vl")]
15489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15490#[cfg_attr(test, assert_instr(vcvtph2ps))]
15491pub fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
15492 unsafe {
15493 let convert: __m128 = _mm_cvtph_ps(a);
15494 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:src.as_f32x4()))
15495 }
15496}
15497
15498/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15499///
15500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvtph_ps&expand=1719)
15501#[inline]
15502#[target_feature(enable = "avx512f,avx512vl")]
15503#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15504#[cfg_attr(test, assert_instr(vcvtph2ps))]
15505pub fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
15506 unsafe {
15507 let convert: __m128 = _mm_cvtph_ps(a);
15508 transmute(src:simd_select_bitmask(m:k, yes:convert.as_f32x4(), no:f32x4::ZERO))
15509 }
15510}
15511
15512/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15513/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15514///
15515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epi32&expand=1916)
15516#[inline]
15517#[target_feature(enable = "avx512f")]
15518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15519#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15520#[rustc_legacy_const_generics(1)]
15521pub fn _mm512_cvtt_roundps_epi32<const SAE: i32>(a: __m512) -> __m512i {
15522 unsafe {
15523 static_assert_sae!(SAE);
15524 let a: f32x16 = a.as_f32x16();
15525 let r: i32x16 = vcvttps2dq(a, src:i32x16::ZERO, mask:0b11111111_11111111, SAE);
15526 transmute(src:r)
15527 }
15528}
15529
15530/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15531/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15532///
15533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epi32&expand=1917)
15534#[inline]
15535#[target_feature(enable = "avx512f")]
15536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15537#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15538#[rustc_legacy_const_generics(3)]
15539pub fn _mm512_mask_cvtt_roundps_epi32<const SAE: i32>(
15540 src: __m512i,
15541 k: __mmask16,
15542 a: __m512,
15543) -> __m512i {
15544 unsafe {
15545 static_assert_sae!(SAE);
15546 let a: f32x16 = a.as_f32x16();
15547 let src: i32x16 = src.as_i32x16();
15548 let r: i32x16 = vcvttps2dq(a, src, mask:k, SAE);
15549 transmute(src:r)
15550 }
15551}
15552
15553/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15554/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15555///
15556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epi32&expand=1918)
15557#[inline]
15558#[target_feature(enable = "avx512f")]
15559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15560#[cfg_attr(test, assert_instr(vcvttps2dq, SAE = 8))]
15561#[rustc_legacy_const_generics(2)]
15562pub fn _mm512_maskz_cvtt_roundps_epi32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15563 unsafe {
15564 static_assert_sae!(SAE);
15565 let a: f32x16 = a.as_f32x16();
15566 let r: i32x16 = vcvttps2dq(a, src:i32x16::ZERO, mask:k, SAE);
15567 transmute(src:r)
15568 }
15569}
15570
15571/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15572/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15573///
15574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundps_epu32&expand=1922)
15575#[inline]
15576#[target_feature(enable = "avx512f")]
15577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15578#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15579#[rustc_legacy_const_generics(1)]
15580pub fn _mm512_cvtt_roundps_epu32<const SAE: i32>(a: __m512) -> __m512i {
15581 unsafe {
15582 static_assert_sae!(SAE);
15583 let a: f32x16 = a.as_f32x16();
15584 let r: u32x16 = vcvttps2udq(a, src:u32x16::ZERO, mask:0b11111111_11111111, SAE);
15585 transmute(src:r)
15586 }
15587}
15588
15589/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15590/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15591///
15592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundps_epu32&expand=1923)
15593#[inline]
15594#[target_feature(enable = "avx512f")]
15595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15596#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15597#[rustc_legacy_const_generics(3)]
15598pub fn _mm512_mask_cvtt_roundps_epu32<const SAE: i32>(
15599 src: __m512i,
15600 k: __mmask16,
15601 a: __m512,
15602) -> __m512i {
15603 unsafe {
15604 static_assert_sae!(SAE);
15605 let a: f32x16 = a.as_f32x16();
15606 let src: u32x16 = src.as_u32x16();
15607 let r: u32x16 = vcvttps2udq(a, src, mask:k, SAE);
15608 transmute(src:r)
15609 }
15610}
15611
15612/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15613/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15614///
15615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundps_epu32&expand=1924)
15616#[inline]
15617#[target_feature(enable = "avx512f")]
15618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15619#[cfg_attr(test, assert_instr(vcvttps2udq, SAE = 8))]
15620#[rustc_legacy_const_generics(2)]
15621pub fn _mm512_maskz_cvtt_roundps_epu32<const SAE: i32>(k: __mmask16, a: __m512) -> __m512i {
15622 unsafe {
15623 static_assert_sae!(SAE);
15624 let a: f32x16 = a.as_f32x16();
15625 let r: u32x16 = vcvttps2udq(a, src:u32x16::ZERO, mask:k, SAE);
15626 transmute(src:r)
15627 }
15628}
15629
15630/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.\
15631/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15632///
15633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epi32&expand=1904)
15634#[inline]
15635#[target_feature(enable = "avx512f")]
15636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15637#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15638#[rustc_legacy_const_generics(1)]
15639pub fn _mm512_cvtt_roundpd_epi32<const SAE: i32>(a: __m512d) -> __m256i {
15640 unsafe {
15641 static_assert_sae!(SAE);
15642 let a: f64x8 = a.as_f64x8();
15643 let r: i32x8 = vcvttpd2dq(a, src:i32x8::ZERO, mask:0b11111111, SAE);
15644 transmute(src:r)
15645 }
15646}
15647
15648/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15649/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15650///
15651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epi32&expand=1905)
15652#[inline]
15653#[target_feature(enable = "avx512f")]
15654#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15655#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15656#[rustc_legacy_const_generics(3)]
15657pub fn _mm512_mask_cvtt_roundpd_epi32<const SAE: i32>(
15658 src: __m256i,
15659 k: __mmask8,
15660 a: __m512d,
15661) -> __m256i {
15662 unsafe {
15663 static_assert_sae!(SAE);
15664 let a: f64x8 = a.as_f64x8();
15665 let src: i32x8 = src.as_i32x8();
15666 let r: i32x8 = vcvttpd2dq(a, src, mask:k, SAE);
15667 transmute(src:r)
15668 }
15669}
15670
15671/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15672/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15673///
15674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epi32&expand=1918)
15675#[inline]
15676#[target_feature(enable = "avx512f")]
15677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15678#[cfg_attr(test, assert_instr(vcvttpd2dq, SAE = 8))]
15679#[rustc_legacy_const_generics(2)]
15680pub fn _mm512_maskz_cvtt_roundpd_epi32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15681 unsafe {
15682 static_assert_sae!(SAE);
15683 let a: f64x8 = a.as_f64x8();
15684 let r: i32x8 = vcvttpd2dq(a, src:i32x8::ZERO, mask:k, SAE);
15685 transmute(src:r)
15686 }
15687}
15688
15689/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.\
15690/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15691///
15692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtt_roundpd_epu32&expand=1910)
15693#[inline]
15694#[target_feature(enable = "avx512f")]
15695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15696#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15697#[rustc_legacy_const_generics(1)]
15698pub fn _mm512_cvtt_roundpd_epu32<const SAE: i32>(a: __m512d) -> __m256i {
15699 unsafe {
15700 static_assert_sae!(SAE);
15701 let a: f64x8 = a.as_f64x8();
15702 let r: u32x8 = vcvttpd2udq(a, src:i32x8::ZERO, mask:0b11111111, SAE);
15703 transmute(src:r)
15704 }
15705}
15706
15707/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).\
15708/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15709///
15710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtt_roundpd_epu32&expand=1911)
15711#[inline]
15712#[target_feature(enable = "avx512f")]
15713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15714#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15715#[rustc_legacy_const_generics(3)]
15716pub fn _mm512_mask_cvtt_roundpd_epu32<const SAE: i32>(
15717 src: __m256i,
15718 k: __mmask8,
15719 a: __m512d,
15720) -> __m256i {
15721 unsafe {
15722 static_assert_sae!(SAE);
15723 let a: f64x8 = a.as_f64x8();
15724 let src: i32x8 = src.as_i32x8();
15725 let r: u32x8 = vcvttpd2udq(a, src, mask:k, SAE);
15726 transmute(src:r)
15727 }
15728}
15729
15730/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15731///
15732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epi32&expand=1984)
15733#[inline]
15734#[target_feature(enable = "avx512f")]
15735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15736#[cfg_attr(test, assert_instr(vcvttps2dq))]
15737pub fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
15738 unsafe {
15739 transmute(src:vcvttps2dq(
15740 a.as_f32x16(),
15741 src:i32x16::ZERO,
15742 mask:0b11111111_11111111,
15743 _MM_FROUND_CUR_DIRECTION,
15744 ))
15745 }
15746}
15747
15748/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15749///
15750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epi32&expand=1985)
15751#[inline]
15752#[target_feature(enable = "avx512f")]
15753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15754#[cfg_attr(test, assert_instr(vcvttps2dq))]
15755pub fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15756 unsafe {
15757 transmute(src:vcvttps2dq(
15758 a.as_f32x16(),
15759 src.as_i32x16(),
15760 mask:k,
15761 _MM_FROUND_CUR_DIRECTION,
15762 ))
15763 }
15764}
15765
15766/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15767///
15768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epi32&expand=1986)
15769#[inline]
15770#[target_feature(enable = "avx512f")]
15771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15772#[cfg_attr(test, assert_instr(vcvttps2dq))]
15773pub fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
15774 unsafe {
15775 transmute(src:vcvttps2dq(
15776 a.as_f32x16(),
15777 src:i32x16::ZERO,
15778 mask:k,
15779 _MM_FROUND_CUR_DIRECTION,
15780 ))
15781 }
15782}
15783
15784/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15785///
15786/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epi32&expand=1982)
15787#[inline]
15788#[target_feature(enable = "avx512f,avx512vl")]
15789#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15790#[cfg_attr(test, assert_instr(vcvttps2dq))]
15791pub fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15792 unsafe { transmute(src:vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), mask:k)) }
15793}
15794
15795/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15796///
15797/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epi32&expand=1983)
15798#[inline]
15799#[target_feature(enable = "avx512f,avx512vl")]
15800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15801#[cfg_attr(test, assert_instr(vcvttps2dq))]
15802pub fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
15803 unsafe { transmute(src:vcvttps2dq256(a.as_f32x8(), src:i32x8::ZERO, mask:k)) }
15804}
15805
15806/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15807///
15808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epi32&expand=1979)
15809#[inline]
15810#[target_feature(enable = "avx512f,avx512vl")]
15811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15812#[cfg_attr(test, assert_instr(vcvttps2dq))]
15813pub fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15814 unsafe { transmute(src:vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), mask:k)) }
15815}
15816
15817/// Convert packed single-precision (32-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15818///
15819/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epi32&expand=1980)
15820#[inline]
15821#[target_feature(enable = "avx512f,avx512vl")]
15822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15823#[cfg_attr(test, assert_instr(vcvttps2dq))]
15824pub fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
15825 unsafe { transmute(src:vcvttps2dq128(a.as_f32x4(), src:i32x4::ZERO, mask:k)) }
15826}
15827
15828/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15829///
15830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttps_epu32&expand=2002)
15831#[inline]
15832#[target_feature(enable = "avx512f")]
15833#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15834#[cfg_attr(test, assert_instr(vcvttps2udq))]
15835pub fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
15836 unsafe {
15837 transmute(src:vcvttps2udq(
15838 a.as_f32x16(),
15839 src:u32x16::ZERO,
15840 mask:0b11111111_11111111,
15841 _MM_FROUND_CUR_DIRECTION,
15842 ))
15843 }
15844}
15845
15846/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15847///
15848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttps_epu32&expand=2003)
15849#[inline]
15850#[target_feature(enable = "avx512f")]
15851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15852#[cfg_attr(test, assert_instr(vcvttps2udq))]
15853pub fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
15854 unsafe {
15855 transmute(src:vcvttps2udq(
15856 a.as_f32x16(),
15857 src.as_u32x16(),
15858 mask:k,
15859 _MM_FROUND_CUR_DIRECTION,
15860 ))
15861 }
15862}
15863
15864/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15865///
15866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttps_epu32&expand=2004)
15867#[inline]
15868#[target_feature(enable = "avx512f")]
15869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15870#[cfg_attr(test, assert_instr(vcvttps2udq))]
15871pub fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
15872 unsafe {
15873 transmute(src:vcvttps2udq(
15874 a.as_f32x16(),
15875 src:u32x16::ZERO,
15876 mask:k,
15877 _MM_FROUND_CUR_DIRECTION,
15878 ))
15879 }
15880}
15881
15882/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15883///
15884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttps_epu32&expand=1999)
15885#[inline]
15886#[target_feature(enable = "avx512f,avx512vl")]
15887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15888#[cfg_attr(test, assert_instr(vcvttps2udq))]
15889pub fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
15890 unsafe { transmute(src:vcvttps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:0b11111111)) }
15891}
15892
15893/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15894///
15895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttps_epu32&expand=2000)
15896#[inline]
15897#[target_feature(enable = "avx512f,avx512vl")]
15898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15899#[cfg_attr(test, assert_instr(vcvttps2udq))]
15900pub fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
15901 unsafe { transmute(src:vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), mask:k)) }
15902}
15903
15904/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15905///
15906/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttps_epu32&expand=2001)
15907#[inline]
15908#[target_feature(enable = "avx512f,avx512vl")]
15909#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15910#[cfg_attr(test, assert_instr(vcvttps2udq))]
15911pub fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
15912 unsafe { transmute(src:vcvttps2udq256(a.as_f32x8(), src:u32x8::ZERO, mask:k)) }
15913}
15914
15915/// Convert packed single-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
15916///
15917/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttps_epu32&expand=1996)
15918#[inline]
15919#[target_feature(enable = "avx512f,avx512vl")]
15920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15921#[cfg_attr(test, assert_instr(vcvttps2udq))]
15922pub fn _mm_cvttps_epu32(a: __m128) -> __m128i {
15923 unsafe { transmute(src:vcvttps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:0b11111111)) }
15924}
15925
15926/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15927///
15928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttps_epu32&expand=1997)
15929#[inline]
15930#[target_feature(enable = "avx512f,avx512vl")]
15931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15932#[cfg_attr(test, assert_instr(vcvttps2udq))]
15933pub fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
15934 unsafe { transmute(src:vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), mask:k)) }
15935}
15936
15937/// Convert packed double-precision (32-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
15938///
15939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttps_epu32&expand=1998)
15940#[inline]
15941#[target_feature(enable = "avx512f,avx512vl")]
15942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15943#[cfg_attr(test, assert_instr(vcvttps2udq))]
15944pub fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
15945 unsafe { transmute(src:vcvttps2udq128(a.as_f32x4(), src:u32x4::ZERO, mask:k)) }
15946}
15947
15948/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).\
15949/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
15950///
15951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvtt_roundpd_epu32&expand=1912)
15952#[inline]
15953#[target_feature(enable = "avx512f")]
15954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15955#[cfg_attr(test, assert_instr(vcvttpd2udq, SAE = 8))]
15956#[rustc_legacy_const_generics(2)]
15957pub fn _mm512_maskz_cvtt_roundpd_epu32<const SAE: i32>(k: __mmask8, a: __m512d) -> __m256i {
15958 unsafe {
15959 static_assert_sae!(SAE);
15960 let a: f64x8 = a.as_f64x8();
15961 let r: u32x8 = vcvttpd2udq(a, src:i32x8::ZERO, mask:k, SAE);
15962 transmute(src:r)
15963 }
15964}
15965
15966/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst.
15967///
15968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epi32&expand=1947)
15969#[inline]
15970#[target_feature(enable = "avx512f")]
15971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15972#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15973pub fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
15974 unsafe {
15975 transmute(src:vcvttpd2dq(
15976 a.as_f64x8(),
15977 src:i32x8::ZERO,
15978 mask:0b11111111,
15979 _MM_FROUND_CUR_DIRECTION,
15980 ))
15981 }
15982}
15983
15984/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
15985///
15986/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epi32&expand=1948)
15987#[inline]
15988#[target_feature(enable = "avx512f")]
15989#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
15990#[cfg_attr(test, assert_instr(vcvttpd2dq))]
15991pub fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
15992 unsafe {
15993 transmute(src:vcvttpd2dq(
15994 a.as_f64x8(),
15995 src.as_i32x8(),
15996 mask:k,
15997 _MM_FROUND_CUR_DIRECTION,
15998 ))
15999 }
16000}
16001
16002/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16003///
16004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epi32&expand=1949)
16005#[inline]
16006#[target_feature(enable = "avx512f")]
16007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16008#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16009pub fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
16010 unsafe {
16011 transmute(src:vcvttpd2dq(
16012 a.as_f64x8(),
16013 src:i32x8::ZERO,
16014 mask:k,
16015 _MM_FROUND_CUR_DIRECTION,
16016 ))
16017 }
16018}
16019
16020/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16021///
16022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epi32&expand=1945)
16023#[inline]
16024#[target_feature(enable = "avx512f,avx512vl")]
16025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16026#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16027pub fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16028 unsafe { transmute(src:vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), mask:k)) }
16029}
16030
16031/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16032///
16033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epi32&expand=1946)
16034#[inline]
16035#[target_feature(enable = "avx512f,avx512vl")]
16036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16037#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16038pub fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
16039 unsafe { transmute(src:vcvttpd2dq256(a.as_f64x4(), src:i32x4::ZERO, mask:k)) }
16040}
16041
16042/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16043///
16044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epi32&expand=1942)
16045#[inline]
16046#[target_feature(enable = "avx512f,avx512vl")]
16047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16048#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16049pub fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16050 unsafe { transmute(src:vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), mask:k)) }
16051}
16052
16053/// Convert packed double-precision (64-bit) floating-point elements in a to packed 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16054///
16055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epi32&expand=1943)
16056#[inline]
16057#[target_feature(enable = "avx512f,avx512vl")]
16058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16059#[cfg_attr(test, assert_instr(vcvttpd2dq))]
16060pub fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
16061 unsafe { transmute(src:vcvttpd2dq128(a.as_f64x2(), src:i32x4::ZERO, mask:k)) }
16062}
16063
16064/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16065///
16066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvttpd_epu32&expand=1965)
16067#[inline]
16068#[target_feature(enable = "avx512f")]
16069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16070#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16071pub fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
16072 unsafe {
16073 transmute(src:vcvttpd2udq(
16074 a.as_f64x8(),
16075 src:i32x8::ZERO,
16076 mask:0b11111111,
16077 _MM_FROUND_CUR_DIRECTION,
16078 ))
16079 }
16080}
16081
16082/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16083///
16084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvttpd_epu32&expand=1966)
16085#[inline]
16086#[target_feature(enable = "avx512f")]
16087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16088#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16089pub fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
16090 unsafe {
16091 transmute(src:vcvttpd2udq(
16092 a.as_f64x8(),
16093 src.as_i32x8(),
16094 mask:k,
16095 _MM_FROUND_CUR_DIRECTION,
16096 ))
16097 }
16098}
16099
16100/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16101///
16102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_cvttpd_epu32&expand=1967)
16103#[inline]
16104#[target_feature(enable = "avx512f")]
16105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16106#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16107pub fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
16108 unsafe {
16109 transmute(src:vcvttpd2udq(
16110 a.as_f64x8(),
16111 src:i32x8::ZERO,
16112 mask:k,
16113 _MM_FROUND_CUR_DIRECTION,
16114 ))
16115 }
16116}
16117
16118/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16119///
16120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvttpd_epu32&expand=1962)
16121#[inline]
16122#[target_feature(enable = "avx512f,avx512vl")]
16123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16124#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16125pub fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
16126 unsafe { transmute(src:vcvttpd2udq256(a.as_f64x4(), src:i32x4::ZERO, mask:0b11111111)) }
16127}
16128
16129/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16130///
16131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvttpd_epu32&expand=1963)
16132#[inline]
16133#[target_feature(enable = "avx512f,avx512vl")]
16134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16135#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16136pub fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
16137 unsafe { transmute(src:vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), mask:k)) }
16138}
16139
16140/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16141///
16142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_cvttpd_epu32&expand=1964)
16143#[inline]
16144#[target_feature(enable = "avx512f,avx512vl")]
16145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16146#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16147pub fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
16148 unsafe { transmute(src:vcvttpd2udq256(a.as_f64x4(), src:i32x4::ZERO, mask:k)) }
16149}
16150
16151/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst.
16152///
16153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttpd_epu32&expand=1959)
16154#[inline]
16155#[target_feature(enable = "avx512f,avx512vl")]
16156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16157#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16158pub fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
16159 unsafe { transmute(src:vcvttpd2udq128(a.as_f64x2(), src:i32x4::ZERO, mask:0b11111111)) }
16160}
16161
16162/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
16163///
16164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvttpd_epu32&expand=1960)
16165#[inline]
16166#[target_feature(enable = "avx512f,avx512vl")]
16167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16168#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16169pub fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
16170 unsafe { transmute(src:vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), mask:k)) }
16171}
16172
16173/// Convert packed double-precision (64-bit) floating-point elements in a to packed unsigned 32-bit integers with truncation, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
16174///
16175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_cvttpd_epu32&expand=1961)
16176#[inline]
16177#[target_feature(enable = "avx512f,avx512vl")]
16178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16179#[cfg_attr(test, assert_instr(vcvttpd2udq))]
16180pub fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
16181 unsafe { transmute(src:vcvttpd2udq128(a.as_f64x2(), src:i32x4::ZERO, mask:k)) }
16182}
16183
16184/// Returns vector of type `__m512d` with all elements set to zero.
16185///
16186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_pd&expand=5018)
16187#[inline]
16188#[target_feature(enable = "avx512f")]
16189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16190#[cfg_attr(test, assert_instr(vxorps))]
16191pub fn _mm512_setzero_pd() -> __m512d {
16192 // All-0 is a properly initialized __m512d
16193 unsafe { const { mem::zeroed() } }
16194}
16195
16196/// Returns vector of type `__m512` with all elements set to zero.
16197///
16198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_ps&expand=5021)
16199#[inline]
16200#[target_feature(enable = "avx512f")]
16201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16202#[cfg_attr(test, assert_instr(vxorps))]
16203pub fn _mm512_setzero_ps() -> __m512 {
16204 // All-0 is a properly initialized __m512
16205 unsafe { const { mem::zeroed() } }
16206}
16207
16208/// Return vector of type `__m512` with all elements set to zero.
16209///
16210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero&expand=5014)
16211#[inline]
16212#[target_feature(enable = "avx512f")]
16213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16214#[cfg_attr(test, assert_instr(vxorps))]
16215pub fn _mm512_setzero() -> __m512 {
16216 // All-0 is a properly initialized __m512
16217 unsafe { const { mem::zeroed() } }
16218}
16219
16220/// Returns vector of type `__m512i` with all elements set to zero.
16221///
16222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_si512&expand=5024)
16223#[inline]
16224#[target_feature(enable = "avx512f")]
16225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16226#[cfg_attr(test, assert_instr(vxorps))]
16227pub fn _mm512_setzero_si512() -> __m512i {
16228 // All-0 is a properly initialized __m512i
16229 unsafe { const { mem::zeroed() } }
16230}
16231
16232/// Return vector of type `__m512i` with all elements set to zero.
16233///
16234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setzero_epi32&expand=5015)
16235#[inline]
16236#[target_feature(enable = "avx512f")]
16237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16238#[cfg_attr(test, assert_instr(vxorps))]
16239pub fn _mm512_setzero_epi32() -> __m512i {
16240 // All-0 is a properly initialized __m512i
16241 unsafe { const { mem::zeroed() } }
16242}
16243
16244/// Sets packed 32-bit integers in `dst` with the supplied values in reverse
16245/// order.
16246///
16247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi32&expand=4991)
16248#[inline]
16249#[target_feature(enable = "avx512f")]
16250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16251pub fn _mm512_setr_epi32(
16252 e15: i32,
16253 e14: i32,
16254 e13: i32,
16255 e12: i32,
16256 e11: i32,
16257 e10: i32,
16258 e9: i32,
16259 e8: i32,
16260 e7: i32,
16261 e6: i32,
16262 e5: i32,
16263 e4: i32,
16264 e3: i32,
16265 e2: i32,
16266 e1: i32,
16267 e0: i32,
16268) -> __m512i {
16269 unsafe {
16270 let r: i32x16 = i32x16::new(
16271 x0:e15, x1:e14, x2:e13, x3:e12, x4:e11, x5:e10, x6:e9, x7:e8, x8:e7, x9:e6, x10:e5, x11:e4, x12:e3, x13:e2, x14:e1, x15:e0,
16272 );
16273 transmute(src:r)
16274 }
16275}
16276
16277/// Set packed 8-bit integers in dst with the supplied values.
16278///
16279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi8&expand=4915)
16280#[inline]
16281#[target_feature(enable = "avx512f")]
16282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16283pub fn _mm512_set_epi8(
16284 e63: i8,
16285 e62: i8,
16286 e61: i8,
16287 e60: i8,
16288 e59: i8,
16289 e58: i8,
16290 e57: i8,
16291 e56: i8,
16292 e55: i8,
16293 e54: i8,
16294 e53: i8,
16295 e52: i8,
16296 e51: i8,
16297 e50: i8,
16298 e49: i8,
16299 e48: i8,
16300 e47: i8,
16301 e46: i8,
16302 e45: i8,
16303 e44: i8,
16304 e43: i8,
16305 e42: i8,
16306 e41: i8,
16307 e40: i8,
16308 e39: i8,
16309 e38: i8,
16310 e37: i8,
16311 e36: i8,
16312 e35: i8,
16313 e34: i8,
16314 e33: i8,
16315 e32: i8,
16316 e31: i8,
16317 e30: i8,
16318 e29: i8,
16319 e28: i8,
16320 e27: i8,
16321 e26: i8,
16322 e25: i8,
16323 e24: i8,
16324 e23: i8,
16325 e22: i8,
16326 e21: i8,
16327 e20: i8,
16328 e19: i8,
16329 e18: i8,
16330 e17: i8,
16331 e16: i8,
16332 e15: i8,
16333 e14: i8,
16334 e13: i8,
16335 e12: i8,
16336 e11: i8,
16337 e10: i8,
16338 e9: i8,
16339 e8: i8,
16340 e7: i8,
16341 e6: i8,
16342 e5: i8,
16343 e4: i8,
16344 e3: i8,
16345 e2: i8,
16346 e1: i8,
16347 e0: i8,
16348) -> __m512i {
16349 unsafe {
16350 let r: i8x64 = i8x64::new(
16351 x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, x16:e16, x17:e17, x18:e18,
16352 x19:e19, x20:e20, x21:e21, x22:e22, x23:e23, x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31, x32:e32, x33:e33, x34:e34, x35:e35,
16353 x36:e36, x37:e37, x38:e38, x39:e39, x40:e40, x41:e41, x42:e42, x43:e43, x44:e44, x45:e45, x46:e46, x47:e47, x48:e48, x49:e49, x50:e50, x51:e51, x52:e52,
16354 x53:e53, x54:e54, x55:e55, x56:e56, x57:e57, x58:e58, x59:e59, x60:e60, x61:e61, x62:e62, x63:e63,
16355 );
16356 transmute(src:r)
16357 }
16358}
16359
16360/// Set packed 16-bit integers in dst with the supplied values.
16361///
16362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi16&expand=4905)
16363#[inline]
16364#[target_feature(enable = "avx512f")]
16365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16366pub fn _mm512_set_epi16(
16367 e31: i16,
16368 e30: i16,
16369 e29: i16,
16370 e28: i16,
16371 e27: i16,
16372 e26: i16,
16373 e25: i16,
16374 e24: i16,
16375 e23: i16,
16376 e22: i16,
16377 e21: i16,
16378 e20: i16,
16379 e19: i16,
16380 e18: i16,
16381 e17: i16,
16382 e16: i16,
16383 e15: i16,
16384 e14: i16,
16385 e13: i16,
16386 e12: i16,
16387 e11: i16,
16388 e10: i16,
16389 e9: i16,
16390 e8: i16,
16391 e7: i16,
16392 e6: i16,
16393 e5: i16,
16394 e4: i16,
16395 e3: i16,
16396 e2: i16,
16397 e1: i16,
16398 e0: i16,
16399) -> __m512i {
16400 unsafe {
16401 let r: i16x32 = i16x32::new(
16402 x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15, x16:e16, x17:e17, x18:e18,
16403 x19:e19, x20:e20, x21:e21, x22:e22, x23:e23, x24:e24, x25:e25, x26:e26, x27:e27, x28:e28, x29:e29, x30:e30, x31:e31,
16404 );
16405 transmute(src:r)
16406 }
16407}
16408
16409/// Set packed 32-bit integers in dst with the repeated 4 element sequence.
16410///
16411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi32&expand=4982)
16412#[inline]
16413#[target_feature(enable = "avx512f")]
16414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16415pub fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16416 _mm512_set_epi32(e15:d, e14:c, e13:b, e12:a, e11:d, e10:c, e9:b, e8:a, e7:d, e6:c, e5:b, e4:a, e3:d, e2:c, e1:b, e0:a)
16417}
16418
16419/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence.
16420///
16421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_ps&expand=4985)
16422#[inline]
16423#[target_feature(enable = "avx512f")]
16424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16425pub fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16426 _mm512_set_ps(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a, e8:d, e9:c, e10:b, e11:a, e12:d, e13:c, e14:b, e15:a)
16427}
16428
16429/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence.
16430///
16431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_pd&expand=4984)
16432#[inline]
16433#[target_feature(enable = "avx512f")]
16434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16435pub fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16436 _mm512_set_pd(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a)
16437}
16438
16439/// Set packed 32-bit integers in dst with the repeated 4 element sequence in reverse order.
16440///
16441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi32&expand=5009)
16442#[inline]
16443#[target_feature(enable = "avx512f")]
16444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16445pub fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
16446 _mm512_set_epi32(e15:a, e14:b, e13:c, e12:d, e11:a, e10:b, e9:c, e8:d, e7:a, e6:b, e5:c, e4:d, e3:a, e2:b, e1:c, e0:d)
16447}
16448
16449/// Set packed single-precision (32-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16450///
16451/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_ps&expand=5012)
16452#[inline]
16453#[target_feature(enable = "avx512f")]
16454#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16455pub fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
16456 _mm512_set_ps(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d, e8:a, e9:b, e10:c, e11:d, e12:a, e13:b, e14:c, e15:d)
16457}
16458
16459/// Set packed double-precision (64-bit) floating-point elements in dst with the repeated 4 element sequence in reverse order.
16460///
16461/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_pd&expand=5011)
16462#[inline]
16463#[target_feature(enable = "avx512f")]
16464#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16465pub fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
16466 _mm512_set_pd(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d)
16467}
16468
16469/// Set packed 64-bit integers in dst with the supplied values.
16470///
16471/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi64&expand=4910)
16472#[inline]
16473#[target_feature(enable = "avx512f")]
16474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16475pub fn _mm512_set_epi64(
16476 e0: i64,
16477 e1: i64,
16478 e2: i64,
16479 e3: i64,
16480 e4: i64,
16481 e5: i64,
16482 e6: i64,
16483 e7: i64,
16484) -> __m512i {
16485 _mm512_setr_epi64(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
16486}
16487
16488/// Set packed 64-bit integers in dst with the supplied values in reverse order.
16489///
16490/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_epi64&expand=4993)
16491#[inline]
16492#[target_feature(enable = "avx512f")]
16493#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16494pub fn _mm512_setr_epi64(
16495 e0: i64,
16496 e1: i64,
16497 e2: i64,
16498 e3: i64,
16499 e4: i64,
16500 e5: i64,
16501 e6: i64,
16502 e7: i64,
16503) -> __m512i {
16504 unsafe {
16505 let r: i64x8 = i64x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7);
16506 transmute(src:r)
16507 }
16508}
16509
16510/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16511///
16512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_pd&expand=3002)
16513#[inline]
16514#[target_feature(enable = "avx512f")]
16515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16516#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16517#[rustc_legacy_const_generics(2)]
16518pub unsafe fn _mm512_i32gather_pd<const SCALE: i32>(offsets: __m256i, slice: *const u8) -> __m512d {
16519 static_assert_imm8_scale!(SCALE);
16520 let zero: f64x8 = f64x8::ZERO;
16521 let neg_one: i8 = -1;
16522 let slice: *const i8 = slice as *const i8;
16523 let offsets: i32x8 = offsets.as_i32x8();
16524 let r: f64x8 = vgatherdpd(src:zero, slice, offsets, mask:neg_one, SCALE);
16525 transmute(src:r)
16526}
16527
16528/// Gather double-precision (64-bit) floating-point elements from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16529///
16530/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_pd&expand=3003)
16531#[inline]
16532#[target_feature(enable = "avx512f")]
16533#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16534#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
16535#[rustc_legacy_const_generics(4)]
16536pub unsafe fn _mm512_mask_i32gather_pd<const SCALE: i32>(
16537 src: __m512d,
16538 mask: __mmask8,
16539 offsets: __m256i,
16540 slice: *const u8,
16541) -> __m512d {
16542 static_assert_imm8_scale!(SCALE);
16543 let src: f64x8 = src.as_f64x8();
16544 let slice: *const i8 = slice as *const i8;
16545 let offsets: i32x8 = offsets.as_i32x8();
16546 let r: f64x8 = vgatherdpd(src, slice, offsets, mask as i8, SCALE);
16547 transmute(src:r)
16548}
16549
16550/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16551///
16552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_pd&expand=3092)
16553#[inline]
16554#[target_feature(enable = "avx512f")]
16555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16556#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16557#[rustc_legacy_const_generics(2)]
16558pub unsafe fn _mm512_i64gather_pd<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m512d {
16559 static_assert_imm8_scale!(SCALE);
16560 let zero: f64x8 = f64x8::ZERO;
16561 let neg_one: i8 = -1;
16562 let slice: *const i8 = slice as *const i8;
16563 let offsets: i64x8 = offsets.as_i64x8();
16564 let r: f64x8 = vgatherqpd(src:zero, slice, offsets, mask:neg_one, SCALE);
16565 transmute(src:r)
16566}
16567
16568/// Gather double-precision (64-bit) floating-point elements from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16569///
16570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_pd&expand=3093)
16571#[inline]
16572#[target_feature(enable = "avx512f")]
16573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16574#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
16575#[rustc_legacy_const_generics(4)]
16576pub unsafe fn _mm512_mask_i64gather_pd<const SCALE: i32>(
16577 src: __m512d,
16578 mask: __mmask8,
16579 offsets: __m512i,
16580 slice: *const u8,
16581) -> __m512d {
16582 static_assert_imm8_scale!(SCALE);
16583 let src: f64x8 = src.as_f64x8();
16584 let slice: *const i8 = slice as *const i8;
16585 let offsets: i64x8 = offsets.as_i64x8();
16586 let r: f64x8 = vgatherqpd(src, slice, offsets, mask as i8, SCALE);
16587 transmute(src:r)
16588}
16589
16590/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16591///
16592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_ps&expand=3100)
16593#[inline]
16594#[target_feature(enable = "avx512f")]
16595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16596#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16597#[rustc_legacy_const_generics(2)]
16598pub unsafe fn _mm512_i64gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m256 {
16599 static_assert_imm8_scale!(SCALE);
16600 let zero: f32x8 = f32x8::ZERO;
16601 let neg_one: i8 = -1;
16602 let slice: *const i8 = slice as *const i8;
16603 let offsets: i64x8 = offsets.as_i64x8();
16604 let r: f32x8 = vgatherqps(src:zero, slice, offsets, mask:neg_one, SCALE);
16605 transmute(src:r)
16606}
16607
16608/// Gather single-precision (32-bit) floating-point elements from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16609///
16610/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_ps&expand=3101)
16611#[inline]
16612#[target_feature(enable = "avx512f")]
16613#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16614#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
16615#[rustc_legacy_const_generics(4)]
16616pub unsafe fn _mm512_mask_i64gather_ps<const SCALE: i32>(
16617 src: __m256,
16618 mask: __mmask8,
16619 offsets: __m512i,
16620 slice: *const u8,
16621) -> __m256 {
16622 static_assert_imm8_scale!(SCALE);
16623 let src: f32x8 = src.as_f32x8();
16624 let slice: *const i8 = slice as *const i8;
16625 let offsets: i64x8 = offsets.as_i64x8();
16626 let r: f32x8 = vgatherqps(src, slice, offsets, mask as i8, SCALE);
16627 transmute(src:r)
16628}
16629
16630/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16631///
16632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_ps&expand=3010)
16633#[inline]
16634#[target_feature(enable = "avx512f")]
16635#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16636#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16637#[rustc_legacy_const_generics(2)]
16638pub unsafe fn _mm512_i32gather_ps<const SCALE: i32>(offsets: __m512i, slice: *const u8) -> __m512 {
16639 static_assert_imm8_scale!(SCALE);
16640 let zero: f32x16 = f32x16::ZERO;
16641 let neg_one: i16 = -1;
16642 let slice: *const i8 = slice as *const i8;
16643 let offsets: i32x16 = offsets.as_i32x16();
16644 let r: f32x16 = vgatherdps(src:zero, slice, offsets, mask:neg_one, SCALE);
16645 transmute(src:r)
16646}
16647
16648/// Gather single-precision (32-bit) floating-point elements from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16649///
16650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_ps&expand=3011)
16651#[inline]
16652#[target_feature(enable = "avx512f")]
16653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16654#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
16655#[rustc_legacy_const_generics(4)]
16656pub unsafe fn _mm512_mask_i32gather_ps<const SCALE: i32>(
16657 src: __m512,
16658 mask: __mmask16,
16659 offsets: __m512i,
16660 slice: *const u8,
16661) -> __m512 {
16662 static_assert_imm8_scale!(SCALE);
16663 let src: f32x16 = src.as_f32x16();
16664 let slice: *const i8 = slice as *const i8;
16665 let offsets: i32x16 = offsets.as_i32x16();
16666 let r: f32x16 = vgatherdps(src, slice, offsets, mask as i16, SCALE);
16667 transmute(src:r)
16668}
16669
16670/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16671///
16672/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi32&expand=2986)
16673#[inline]
16674#[target_feature(enable = "avx512f")]
16675#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16676#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16677#[rustc_legacy_const_generics(2)]
16678pub unsafe fn _mm512_i32gather_epi32<const SCALE: i32>(
16679 offsets: __m512i,
16680 slice: *const u8,
16681) -> __m512i {
16682 static_assert_imm8_scale!(SCALE);
16683 let zero: i32x16 = i32x16::ZERO;
16684 let neg_one: i16 = -1;
16685 let slice: *const i8 = slice as *const i8;
16686 let offsets: i32x16 = offsets.as_i32x16();
16687 let r: i32x16 = vpgatherdd(src:zero, slice, offsets, mask:neg_one, SCALE);
16688 transmute(src:r)
16689}
16690
16691/// Gather 32-bit integers from memory using 32-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16692///
16693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi32&expand=2987)
16694#[inline]
16695#[target_feature(enable = "avx512f")]
16696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16697#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
16698#[rustc_legacy_const_generics(4)]
16699pub unsafe fn _mm512_mask_i32gather_epi32<const SCALE: i32>(
16700 src: __m512i,
16701 mask: __mmask16,
16702 offsets: __m512i,
16703 slice: *const u8,
16704) -> __m512i {
16705 static_assert_imm8_scale!(SCALE);
16706 let src: i32x16 = src.as_i32x16();
16707 let mask: i16 = mask as i16;
16708 let slice: *const i8 = slice as *const i8;
16709 let offsets: i32x16 = offsets.as_i32x16();
16710 let r: i32x16 = vpgatherdd(src, slice, offsets, mask, SCALE);
16711 transmute(src:r)
16712}
16713
16714/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16715///
16716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32gather_epi64&expand=2994)
16717#[inline]
16718#[target_feature(enable = "avx512f")]
16719#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16720#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16721#[rustc_legacy_const_generics(2)]
16722pub unsafe fn _mm512_i32gather_epi64<const SCALE: i32>(
16723 offsets: __m256i,
16724 slice: *const u8,
16725) -> __m512i {
16726 static_assert_imm8_scale!(SCALE);
16727 let zero: i64x8 = i64x8::ZERO;
16728 let neg_one: i8 = -1;
16729 let slice: *const i8 = slice as *const i8;
16730 let offsets: i32x8 = offsets.as_i32x8();
16731 let r: i64x8 = vpgatherdq(src:zero, slice, offsets, mask:neg_one, SCALE);
16732 transmute(src:r)
16733}
16734
16735/// Gather 64-bit integers from memory using 32-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16736///
16737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32gather_epi64&expand=2995)
16738#[inline]
16739#[target_feature(enable = "avx512f")]
16740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16741#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
16742#[rustc_legacy_const_generics(4)]
16743pub unsafe fn _mm512_mask_i32gather_epi64<const SCALE: i32>(
16744 src: __m512i,
16745 mask: __mmask8,
16746 offsets: __m256i,
16747 slice: *const u8,
16748) -> __m512i {
16749 static_assert_imm8_scale!(SCALE);
16750 let src: i64x8 = src.as_i64x8();
16751 let mask: i8 = mask as i8;
16752 let slice: *const i8 = slice as *const i8;
16753 let offsets: i32x8 = offsets.as_i32x8();
16754 let r: i64x8 = vpgatherdq(src, slice, offsets, mask, SCALE);
16755 transmute(src:r)
16756}
16757
16758/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16759///
16760/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi64&expand=3084)
16761#[inline]
16762#[target_feature(enable = "avx512f")]
16763#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16764#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16765#[rustc_legacy_const_generics(2)]
16766pub unsafe fn _mm512_i64gather_epi64<const SCALE: i32>(
16767 offsets: __m512i,
16768 slice: *const u8,
16769) -> __m512i {
16770 static_assert_imm8_scale!(SCALE);
16771 let zero: i64x8 = i64x8::ZERO;
16772 let neg_one: i8 = -1;
16773 let slice: *const i8 = slice as *const i8;
16774 let offsets: i64x8 = offsets.as_i64x8();
16775 let r: i64x8 = vpgatherqq(src:zero, slice, offsets, mask:neg_one, SCALE);
16776 transmute(src:r)
16777}
16778
16779/// Gather 64-bit integers from memory using 64-bit indices. 64-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16780///
16781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi64&expand=3085)
16782#[inline]
16783#[target_feature(enable = "avx512f")]
16784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16785#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
16786#[rustc_legacy_const_generics(4)]
16787pub unsafe fn _mm512_mask_i64gather_epi64<const SCALE: i32>(
16788 src: __m512i,
16789 mask: __mmask8,
16790 offsets: __m512i,
16791 slice: *const u8,
16792) -> __m512i {
16793 static_assert_imm8_scale!(SCALE);
16794 let src: i64x8 = src.as_i64x8();
16795 let mask: i8 = mask as i8;
16796 let slice: *const i8 = slice as *const i8;
16797 let offsets: i64x8 = offsets.as_i64x8();
16798 let r: i64x8 = vpgatherqq(src, slice, offsets, mask, SCALE);
16799 transmute(src:r)
16800}
16801
16802/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst. scale should be 1, 2, 4 or 8.
16803///
16804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64gather_epi32&expand=3074)
16805#[inline]
16806#[target_feature(enable = "avx512f")]
16807#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16808#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16809#[rustc_legacy_const_generics(2)]
16810pub unsafe fn _mm512_i64gather_epi32<const SCALE: i32>(
16811 offsets: __m512i,
16812 slice: *const u8,
16813) -> __m256i {
16814 static_assert_imm8_scale!(SCALE);
16815 let zeros: i32x8 = i32x8::ZERO;
16816 let neg_one: i8 = -1;
16817 let slice: *const i8 = slice as *const i8;
16818 let offsets: i64x8 = offsets.as_i64x8();
16819 let r: i32x8 = vpgatherqd(src:zeros, slice, offsets, mask:neg_one, SCALE);
16820 transmute(src:r)
16821}
16822
16823/// Gather 32-bit integers from memory using 64-bit indices. 32-bit elements are loaded from addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). Gathered elements are merged into dst using writemask k (elements are copied from src when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16824///
16825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64gather_epi32&expand=3075)
16826#[inline]
16827#[target_feature(enable = "avx512f")]
16828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16829#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
16830#[rustc_legacy_const_generics(4)]
16831pub unsafe fn _mm512_mask_i64gather_epi32<const SCALE: i32>(
16832 src: __m256i,
16833 mask: __mmask8,
16834 offsets: __m512i,
16835 slice: *const u8,
16836) -> __m256i {
16837 static_assert_imm8_scale!(SCALE);
16838 let src: i32x8 = src.as_i32x8();
16839 let mask: i8 = mask as i8;
16840 let slice: *const i8 = slice as *const i8;
16841 let offsets: i64x8 = offsets.as_i64x8();
16842 let r: i32x8 = vpgatherqd(src, slice, offsets, mask, SCALE);
16843 transmute(src:r)
16844}
16845
16846/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16847///
16848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_pd&expand=3044)
16849#[inline]
16850#[target_feature(enable = "avx512f")]
16851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16852#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16853#[rustc_legacy_const_generics(3)]
16854pub unsafe fn _mm512_i32scatter_pd<const SCALE: i32>(
16855 slice: *mut u8,
16856 offsets: __m256i,
16857 src: __m512d,
16858) {
16859 static_assert_imm8_scale!(SCALE);
16860 let src: f64x8 = src.as_f64x8();
16861 let neg_one: i8 = -1;
16862 let slice: *mut i8 = slice as *mut i8;
16863 let offsets: i32x8 = offsets.as_i32x8();
16864 vscatterdpd(slice, mask:neg_one, offsets, src, SCALE);
16865}
16866
16867/// Scatter double-precision (64-bit) floating-point elements from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16868///
16869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_pd&expand=3045)
16870#[inline]
16871#[target_feature(enable = "avx512f")]
16872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16873#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
16874#[rustc_legacy_const_generics(4)]
16875pub unsafe fn _mm512_mask_i32scatter_pd<const SCALE: i32>(
16876 slice: *mut u8,
16877 mask: __mmask8,
16878 offsets: __m256i,
16879 src: __m512d,
16880) {
16881 static_assert_imm8_scale!(SCALE);
16882 let src: f64x8 = src.as_f64x8();
16883 let slice: *mut i8 = slice as *mut i8;
16884 let offsets: i32x8 = offsets.as_i32x8();
16885 vscatterdpd(slice, mask as i8, offsets, src, SCALE);
16886}
16887
16888/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16889///
16890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_pd&expand=3122)
16891#[inline]
16892#[target_feature(enable = "avx512f")]
16893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16894#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16895#[rustc_legacy_const_generics(3)]
16896pub unsafe fn _mm512_i64scatter_pd<const SCALE: i32>(
16897 slice: *mut u8,
16898 offsets: __m512i,
16899 src: __m512d,
16900) {
16901 static_assert_imm8_scale!(SCALE);
16902 let src: f64x8 = src.as_f64x8();
16903 let neg_one: i8 = -1;
16904 let slice: *mut i8 = slice as *mut i8;
16905 let offsets: i64x8 = offsets.as_i64x8();
16906 vscatterqpd(slice, mask:neg_one, offsets, src, SCALE);
16907}
16908
16909/// Scatter double-precision (64-bit) floating-point elements from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16910///
16911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_pd&expand=3123)
16912#[inline]
16913#[target_feature(enable = "avx512f")]
16914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16915#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
16916#[rustc_legacy_const_generics(4)]
16917pub unsafe fn _mm512_mask_i64scatter_pd<const SCALE: i32>(
16918 slice: *mut u8,
16919 mask: __mmask8,
16920 offsets: __m512i,
16921 src: __m512d,
16922) {
16923 static_assert_imm8_scale!(SCALE);
16924 let src: f64x8 = src.as_f64x8();
16925 let slice: *mut i8 = slice as *mut i8;
16926 let offsets: i64x8 = offsets.as_i64x8();
16927 vscatterqpd(slice, mask as i8, offsets, src, SCALE);
16928}
16929
16930/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
16931///
16932/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_ps&expand=3050)
16933#[inline]
16934#[target_feature(enable = "avx512f")]
16935#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16936#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
16937#[rustc_legacy_const_generics(3)]
16938pub unsafe fn _mm512_i32scatter_ps<const SCALE: i32>(
16939 slice: *mut u8,
16940 offsets: __m512i,
16941 src: __m512,
16942) {
16943 static_assert_imm8_scale!(SCALE);
16944 let src: f32x16 = src.as_f32x16();
16945 let neg_one: i16 = -1;
16946 let slice: *mut i8 = slice as *mut i8;
16947 let offsets: i32x16 = offsets.as_i32x16();
16948 vscatterdps(slice, mask:neg_one, offsets, src, SCALE);
16949}
16950
16951/// Scatter single-precision (32-bit) floating-point elements from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16952///
16953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_ps&expand=3051)
16954#[inline]
16955#[target_feature(enable = "avx512f")]
16956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16957#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
16958#[rustc_legacy_const_generics(4)]
16959pub unsafe fn _mm512_mask_i32scatter_ps<const SCALE: i32>(
16960 slice: *mut u8,
16961 mask: __mmask16,
16962 offsets: __m512i,
16963 src: __m512,
16964) {
16965 static_assert_imm8_scale!(SCALE);
16966 let src: f32x16 = src.as_f32x16();
16967 let slice: *mut i8 = slice as *mut i8;
16968 let offsets: i32x16 = offsets.as_i32x16();
16969 vscatterdps(slice, mask as i16, offsets, src, SCALE);
16970}
16971
16972/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16973///
16974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_ps&expand=3128)
16975#[inline]
16976#[target_feature(enable = "avx512f")]
16977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16978#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
16979#[rustc_legacy_const_generics(3)]
16980pub unsafe fn _mm512_i64scatter_ps<const SCALE: i32>(
16981 slice: *mut u8,
16982 offsets: __m512i,
16983 src: __m256,
16984) {
16985 static_assert_imm8_scale!(SCALE);
16986 let src: f32x8 = src.as_f32x8();
16987 let neg_one: i8 = -1;
16988 let slice: *mut i8 = slice as *mut i8;
16989 let offsets: i64x8 = offsets.as_i64x8();
16990 vscatterqps(slice, mask:neg_one, offsets, src, SCALE);
16991}
16992
16993/// Scatter single-precision (32-bit) floating-point elements from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
16994///
16995/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_ps&expand=3129)
16996#[inline]
16997#[target_feature(enable = "avx512f")]
16998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
16999#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17000#[rustc_legacy_const_generics(4)]
17001pub unsafe fn _mm512_mask_i64scatter_ps<const SCALE: i32>(
17002 slice: *mut u8,
17003 mask: __mmask8,
17004 offsets: __m512i,
17005 src: __m256,
17006) {
17007 static_assert_imm8_scale!(SCALE);
17008 let src: f32x8 = src.as_f32x8();
17009 let slice: *mut i8 = slice as *mut i8;
17010 let offsets: i64x8 = offsets.as_i64x8();
17011 vscatterqps(slice, mask as i8, offsets, src, SCALE);
17012}
17013
17014/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17015///
17016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi64&expand=3038)
17017#[inline]
17018#[target_feature(enable = "avx512f")]
17019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17020#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17021#[rustc_legacy_const_generics(3)]
17022pub unsafe fn _mm512_i32scatter_epi64<const SCALE: i32>(
17023 slice: *mut u8,
17024 offsets: __m256i,
17025 src: __m512i,
17026) {
17027 static_assert_imm8_scale!(SCALE);
17028 let src: i64x8 = src.as_i64x8();
17029 let neg_one: i8 = -1;
17030 let slice: *mut i8 = slice as *mut i8;
17031 let offsets: i32x8 = offsets.as_i32x8();
17032 vpscatterdq(slice, mask:neg_one, offsets, src, SCALE);
17033}
17034
17035/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17036///
17037/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi64&expand=3039)
17038#[inline]
17039#[target_feature(enable = "avx512f")]
17040#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17041#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17042#[rustc_legacy_const_generics(4)]
17043pub unsafe fn _mm512_mask_i32scatter_epi64<const SCALE: i32>(
17044 slice: *mut u8,
17045 mask: __mmask8,
17046 offsets: __m256i,
17047 src: __m512i,
17048) {
17049 static_assert_imm8_scale!(SCALE);
17050 let src: i64x8 = src.as_i64x8();
17051 let mask: i8 = mask as i8;
17052 let slice: *mut i8 = slice as *mut i8;
17053 let offsets: i32x8 = offsets.as_i32x8();
17054 vpscatterdq(slice, mask, offsets, src, SCALE);
17055}
17056
17057/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17058///
17059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi64&expand=3116)
17060#[inline]
17061#[target_feature(enable = "avx512f")]
17062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17063#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17064#[rustc_legacy_const_generics(3)]
17065pub unsafe fn _mm512_i64scatter_epi64<const SCALE: i32>(
17066 slice: *mut u8,
17067 offsets: __m512i,
17068 src: __m512i,
17069) {
17070 static_assert_imm8_scale!(SCALE);
17071 let src: i64x8 = src.as_i64x8();
17072 let neg_one: i8 = -1;
17073 let slice: *mut i8 = slice as *mut i8;
17074 let offsets: i64x8 = offsets.as_i64x8();
17075 vpscatterqq(slice, mask:neg_one, offsets, src, SCALE);
17076}
17077
17078/// Scatter 64-bit integers from a into memory using 64-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17079///
17080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi64&expand=3117)
17081#[inline]
17082#[target_feature(enable = "avx512f")]
17083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17084#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17085#[rustc_legacy_const_generics(4)]
17086pub unsafe fn _mm512_mask_i64scatter_epi64<const SCALE: i32>(
17087 slice: *mut u8,
17088 mask: __mmask8,
17089 offsets: __m512i,
17090 src: __m512i,
17091) {
17092 static_assert_imm8_scale!(SCALE);
17093 let src: i64x8 = src.as_i64x8();
17094 let mask: i8 = mask as i8;
17095 let slice: *mut i8 = slice as *mut i8;
17096 let offsets: i64x8 = offsets.as_i64x8();
17097 vpscatterqq(slice, mask, offsets, src, SCALE);
17098}
17099
17100/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17101///
17102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i32scatter_epi32&expand=3032)
17103#[inline]
17104#[target_feature(enable = "avx512f")]
17105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17106#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17107#[rustc_legacy_const_generics(3)]
17108pub unsafe fn _mm512_i32scatter_epi32<const SCALE: i32>(
17109 slice: *mut u8,
17110 offsets: __m512i,
17111 src: __m512i,
17112) {
17113 static_assert_imm8_scale!(SCALE);
17114 let src: i32x16 = src.as_i32x16();
17115 let neg_one: i16 = -1;
17116 let slice: *mut i8 = slice as *mut i8;
17117 let offsets: i32x16 = offsets.as_i32x16();
17118 vpscatterdd(slice, mask:neg_one, offsets, src, SCALE);
17119}
17120
17121/// Scatter 32-bit integers from a into memory using 32-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17122///
17123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i32scatter_epi32&expand=3033)
17124#[inline]
17125#[target_feature(enable = "avx512f")]
17126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17127#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17128#[rustc_legacy_const_generics(4)]
17129pub unsafe fn _mm512_mask_i32scatter_epi32<const SCALE: i32>(
17130 slice: *mut u8,
17131 mask: __mmask16,
17132 offsets: __m512i,
17133 src: __m512i,
17134) {
17135 static_assert_imm8_scale!(SCALE);
17136 let src: i32x16 = src.as_i32x16();
17137 let mask: i16 = mask as i16;
17138 let slice: *mut i8 = slice as *mut i8;
17139 let offsets: i32x16 = offsets.as_i32x16();
17140 vpscatterdd(slice, mask, offsets, src, SCALE);
17141}
17142
17143/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17144///
17145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_i64scatter_epi32&expand=3108)
17146#[inline]
17147#[target_feature(enable = "avx512f")]
17148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17149#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17150#[rustc_legacy_const_generics(3)]
17151pub unsafe fn _mm512_i64scatter_epi32<const SCALE: i32>(
17152 slice: *mut u8,
17153 offsets: __m512i,
17154 src: __m256i,
17155) {
17156 static_assert_imm8_scale!(SCALE);
17157 let src: i32x8 = src.as_i32x8();
17158 let neg_one: i8 = -1;
17159 let slice: *mut i8 = slice as *mut i8;
17160 let offsets: i64x8 = offsets.as_i64x8();
17161 vpscatterqd(slice, mask:neg_one, offsets, src, SCALE);
17162}
17163
17164/// Scatter 32-bit integers from a into memory using 64-bit indices. 32-bit elements are stored at addresses starting at base_addr and offset by each 64-bit element in vindex (each index is scaled by the factor in scale) subject to mask k (elements are not stored when the corresponding mask bit is not set). scale should be 1, 2, 4 or 8.
17165///
17166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_i64scatter_epi32&expand=3109)
17167#[inline]
17168#[target_feature(enable = "avx512f")]
17169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17170#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17171#[rustc_legacy_const_generics(4)]
17172pub unsafe fn _mm512_mask_i64scatter_epi32<const SCALE: i32>(
17173 slice: *mut u8,
17174 mask: __mmask8,
17175 offsets: __m512i,
17176 src: __m256i,
17177) {
17178 static_assert_imm8_scale!(SCALE);
17179 let src: i32x8 = src.as_i32x8();
17180 let mask: i8 = mask as i8;
17181 let slice: *mut i8 = slice as *mut i8;
17182 let offsets: i64x8 = offsets.as_i64x8();
17183 vpscatterqd(slice, mask, offsets, src, SCALE);
17184}
17185
17186/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17187/// indices stored in the lower half of vindex scaled by scale and stores them in dst.
17188///
17189/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_epi64)
17190#[inline]
17191#[target_feature(enable = "avx512f")]
17192#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17193#[rustc_legacy_const_generics(2)]
17194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17195pub unsafe fn _mm512_i32logather_epi64<const SCALE: i32>(
17196 vindex: __m512i,
17197 base_addr: *const u8,
17198) -> __m512i {
17199 _mm512_i32gather_epi64::<SCALE>(offsets:_mm512_castsi512_si256(vindex), slice:base_addr as _)
17200}
17201
17202/// Loads 8 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17203/// indices stored in the lower half of vindex scaled by scale and stores them in dst using writemask k
17204/// (elements are copied from src when the corresponding mask bit is not set).
17205///
17206/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_epi64)
17207#[inline]
17208#[target_feature(enable = "avx512f")]
17209#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17210#[rustc_legacy_const_generics(4)]
17211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17212pub unsafe fn _mm512_mask_i32logather_epi64<const SCALE: i32>(
17213 src: __m512i,
17214 k: __mmask8,
17215 vindex: __m512i,
17216 base_addr: *const u8,
17217) -> __m512i {
17218 _mm512_mask_i32gather_epi64::<SCALE>(src, mask:k, offsets:_mm512_castsi512_si256(vindex), slice:base_addr as _)
17219}
17220
17221/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17222/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst.
17223///
17224/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32logather_pd)
17225#[inline]
17226#[target_feature(enable = "avx512f")]
17227#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17228#[rustc_legacy_const_generics(2)]
17229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17230pub unsafe fn _mm512_i32logather_pd<const SCALE: i32>(
17231 vindex: __m512i,
17232 base_addr: *const u8,
17233) -> __m512d {
17234 _mm512_i32gather_pd::<SCALE>(offsets:_mm512_castsi512_si256(vindex), slice:base_addr as _)
17235}
17236
17237/// Loads 8 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17238/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale and stores them in dst
17239/// using writemask k (elements are copied from src when the corresponding mask bit is not set).
17240///
17241/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
17242#[inline]
17243#[target_feature(enable = "avx512f")]
17244#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17245#[rustc_legacy_const_generics(4)]
17246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17247pub unsafe fn _mm512_mask_i32logather_pd<const SCALE: i32>(
17248 src: __m512d,
17249 k: __mmask8,
17250 vindex: __m512i,
17251 base_addr: *const u8,
17252) -> __m512d {
17253 _mm512_mask_i32gather_pd::<SCALE>(src, mask:k, offsets:_mm512_castsi512_si256(vindex), slice:base_addr as _)
17254}
17255
17256/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17257/// indices stored in the lower half of vindex scaled by scale.
17258///
17259/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_epi64)
17260#[inline]
17261#[target_feature(enable = "avx512f")]
17262#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17263#[rustc_legacy_const_generics(3)]
17264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17265pub unsafe fn _mm512_i32loscatter_epi64<const SCALE: i32>(
17266 base_addr: *mut u8,
17267 vindex: __m512i,
17268 a: __m512i,
17269) {
17270 _mm512_i32scatter_epi64::<SCALE>(slice:base_addr as _, offsets:_mm512_castsi512_si256(vindex), src:a)
17271}
17272
17273/// Stores 8 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17274/// indices stored in the lower half of vindex scaled by scale using writemask k (elements whose corresponding
17275/// mask bit is not set are not written to memory).
17276///
17277/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
17278#[inline]
17279#[target_feature(enable = "avx512f")]
17280#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17281#[rustc_legacy_const_generics(4)]
17282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17283pub unsafe fn _mm512_mask_i32loscatter_epi64<const SCALE: i32>(
17284 base_addr: *mut u8,
17285 k: __mmask8,
17286 vindex: __m512i,
17287 a: __m512i,
17288) {
17289 _mm512_mask_i32scatter_epi64::<SCALE>(slice:base_addr as _, mask:k, offsets:_mm512_castsi512_si256(vindex), src:a)
17290}
17291
17292/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17293/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale.
17294///
17295/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_i32loscatter_pd)
17296#[inline]
17297#[target_feature(enable = "avx512f")]
17298#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17299#[rustc_legacy_const_generics(3)]
17300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17301pub unsafe fn _mm512_i32loscatter_pd<const SCALE: i32>(
17302 base_addr: *mut u8,
17303 vindex: __m512i,
17304 a: __m512d,
17305) {
17306 _mm512_i32scatter_pd::<SCALE>(slice:base_addr as _, offsets:_mm512_castsi512_si256(vindex), src:a)
17307}
17308
17309/// Stores 8 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17310/// at packed 32-bit integer indices stored in the lower half of vindex scaled by scale using writemask k
17311/// (elements whose corresponding mask bit is not set are not written to memory).
17312///
17313/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
17314#[inline]
17315#[target_feature(enable = "avx512f")]
17316#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17317#[rustc_legacy_const_generics(4)]
17318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17319pub unsafe fn _mm512_mask_i32loscatter_pd<const SCALE: i32>(
17320 base_addr: *mut u8,
17321 k: __mmask8,
17322 vindex: __m512i,
17323 a: __m512d,
17324) {
17325 _mm512_mask_i32scatter_pd::<SCALE>(slice:base_addr as _, mask:k, offsets:_mm512_castsi512_si256(vindex), src:a)
17326}
17327
17328/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17329/// indices stored in vindex scaled by scale
17330///
17331/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_epi32)
17332#[inline]
17333#[target_feature(enable = "avx512f,avx512vl")]
17334#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17335#[rustc_legacy_const_generics(3)]
17336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17337pub unsafe fn _mm256_i32scatter_epi32<const SCALE: i32>(
17338 base_addr: *mut u8,
17339 vindex: __m256i,
17340 a: __m256i,
17341) {
17342 static_assert_imm8_scale!(SCALE);
17343 vpscatterdd_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x8(), src:a.as_i32x8(), SCALE)
17344}
17345
17346/// Stores 8 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17347/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17348/// are not written to memory).
17349///
17350/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi32)
17351#[inline]
17352#[target_feature(enable = "avx512f,avx512vl")]
17353#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17354#[rustc_legacy_const_generics(4)]
17355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17356pub unsafe fn _mm256_mask_i32scatter_epi32<const SCALE: i32>(
17357 base_addr: *mut u8,
17358 k: __mmask8,
17359 vindex: __m256i,
17360 a: __m256i,
17361) {
17362 static_assert_imm8_scale!(SCALE);
17363 vpscatterdd_256(slice:base_addr as _, k, offsets:vindex.as_i32x8(), src:a.as_i32x8(), SCALE)
17364}
17365
17366/// Scatter 64-bit integers from a into memory using 32-bit indices. 64-bit elements are stored at addresses starting at base_addr and offset by each 32-bit element in vindex (each index is scaled by the factor in scale). scale should be 1, 2, 4 or 8.
17367///
17368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_i32scatter_epi64&expand=4099)
17369#[inline]
17370#[target_feature(enable = "avx512f,avx512vl")]
17371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17372#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17373#[rustc_legacy_const_generics(3)]
17374pub unsafe fn _mm256_i32scatter_epi64<const SCALE: i32>(
17375 slice: *mut u8,
17376 offsets: __m128i,
17377 src: __m256i,
17378) {
17379 static_assert_imm8_scale!(SCALE);
17380 let src: i64x4 = src.as_i64x4();
17381 let slice: *mut i8 = slice as *mut i8;
17382 let offsets: i32x4 = offsets.as_i32x4();
17383 vpscatterdq_256(slice, k:0xff, offsets, src, SCALE);
17384}
17385
17386/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17387/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17388/// are not written to memory).
17389///
17390/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_epi64)
17391#[inline]
17392#[target_feature(enable = "avx512f,avx512vl")]
17393#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17394#[rustc_legacy_const_generics(4)]
17395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17396pub unsafe fn _mm256_mask_i32scatter_epi64<const SCALE: i32>(
17397 base_addr: *mut u8,
17398 k: __mmask8,
17399 vindex: __m128i,
17400 a: __m256i,
17401) {
17402 static_assert_imm8_scale!(SCALE);
17403 vpscatterdq_256(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_i64x4(), SCALE)
17404}
17405
17406/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17407/// at packed 32-bit integer indices stored in vindex scaled by scale
17408///
17409/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_pd)
17410#[inline]
17411#[target_feature(enable = "avx512f,avx512vl")]
17412#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17413#[rustc_legacy_const_generics(3)]
17414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17415pub unsafe fn _mm256_i32scatter_pd<const SCALE: i32>(
17416 base_addr: *mut u8,
17417 vindex: __m128i,
17418 a: __m256d,
17419) {
17420 static_assert_imm8_scale!(SCALE);
17421 vscatterdpd_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x4(), src:a.as_f64x4(), SCALE)
17422}
17423
17424/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17425/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17426/// mask bit is not set are not written to memory).
17427///
17428/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_pd)
17429#[inline]
17430#[target_feature(enable = "avx512f,avx512vl")]
17431#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17432#[rustc_legacy_const_generics(4)]
17433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17434pub unsafe fn _mm256_mask_i32scatter_pd<const SCALE: i32>(
17435 base_addr: *mut u8,
17436 k: __mmask8,
17437 vindex: __m128i,
17438 a: __m256d,
17439) {
17440 static_assert_imm8_scale!(SCALE);
17441 vscatterdpd_256(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_f64x4(), SCALE)
17442}
17443
17444/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17445/// at packed 32-bit integer indices stored in vindex scaled by scale
17446///
17447/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i32scatter_ps)
17448#[inline]
17449#[target_feature(enable = "avx512f,avx512vl")]
17450#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17451#[rustc_legacy_const_generics(3)]
17452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17453pub unsafe fn _mm256_i32scatter_ps<const SCALE: i32>(
17454 base_addr: *mut u8,
17455 vindex: __m256i,
17456 a: __m256,
17457) {
17458 static_assert_imm8_scale!(SCALE);
17459 vscatterdps_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x8(), src:a.as_f32x8(), SCALE)
17460}
17461
17462/// Stores 8 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17463/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17464/// mask bit is not set are not written to memory).
17465///
17466/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i32scatter_ps)
17467#[inline]
17468#[target_feature(enable = "avx512f,avx512vl")]
17469#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17470#[rustc_legacy_const_generics(4)]
17471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17472pub unsafe fn _mm256_mask_i32scatter_ps<const SCALE: i32>(
17473 base_addr: *mut u8,
17474 k: __mmask8,
17475 vindex: __m256i,
17476 a: __m256,
17477) {
17478 static_assert_imm8_scale!(SCALE);
17479 vscatterdps_256(slice:base_addr as _, k, offsets:vindex.as_i32x8(), src:a.as_f32x8(), SCALE)
17480}
17481
17482/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17483/// indices stored in vindex scaled by scale
17484///
17485/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi32)
17486#[inline]
17487#[target_feature(enable = "avx512f,avx512vl")]
17488#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17489#[rustc_legacy_const_generics(3)]
17490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17491pub unsafe fn _mm256_i64scatter_epi32<const SCALE: i32>(
17492 base_addr: *mut u8,
17493 vindex: __m256i,
17494 a: __m128i,
17495) {
17496 static_assert_imm8_scale!(SCALE);
17497 vpscatterqd_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x4(), src:a.as_i32x4(), SCALE)
17498}
17499
17500/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17501/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17502/// are not written to memory).
17503///
17504/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi32)
17505#[inline]
17506#[target_feature(enable = "avx512f,avx512vl")]
17507#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17508#[rustc_legacy_const_generics(4)]
17509#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17510pub unsafe fn _mm256_mask_i64scatter_epi32<const SCALE: i32>(
17511 base_addr: *mut u8,
17512 k: __mmask8,
17513 vindex: __m256i,
17514 a: __m128i,
17515) {
17516 static_assert_imm8_scale!(SCALE);
17517 vpscatterqd_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_i32x4(), SCALE)
17518}
17519
17520/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17521/// indices stored in vindex scaled by scale
17522///
17523/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_epi64)
17524#[inline]
17525#[target_feature(enable = "avx512f,avx512vl")]
17526#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17527#[rustc_legacy_const_generics(3)]
17528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17529pub unsafe fn _mm256_i64scatter_epi64<const SCALE: i32>(
17530 base_addr: *mut u8,
17531 vindex: __m256i,
17532 a: __m256i,
17533) {
17534 static_assert_imm8_scale!(SCALE);
17535 vpscatterqq_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x4(), src:a.as_i64x4(), SCALE)
17536}
17537
17538/// Stores 4 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17539/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17540/// are not written to memory).
17541///
17542/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_epi64)
17543#[inline]
17544#[target_feature(enable = "avx512f,avx512vl")]
17545#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
17546#[rustc_legacy_const_generics(4)]
17547#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17548pub unsafe fn _mm256_mask_i64scatter_epi64<const SCALE: i32>(
17549 base_addr: *mut u8,
17550 k: __mmask8,
17551 vindex: __m256i,
17552 a: __m256i,
17553) {
17554 static_assert_imm8_scale!(SCALE);
17555 vpscatterqq_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_i64x4(), SCALE)
17556}
17557
17558/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17559/// at packed 64-bit integer indices stored in vindex scaled by scale
17560///
17561/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_pd)
17562#[inline]
17563#[target_feature(enable = "avx512f,avx512vl")]
17564#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17565#[rustc_legacy_const_generics(3)]
17566#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17567pub unsafe fn _mm256_i64scatter_pd<const SCALE: i32>(
17568 base_addr: *mut u8,
17569 vindex: __m256i,
17570 a: __m256d,
17571) {
17572 static_assert_imm8_scale!(SCALE);
17573 vscatterqpd_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x4(), src:a.as_f64x4(), SCALE)
17574}
17575
17576/// Stores 4 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17577/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17578/// mask bit is not set are not written to memory).
17579///
17580/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_pd)
17581#[inline]
17582#[target_feature(enable = "avx512f,avx512vl")]
17583#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
17584#[rustc_legacy_const_generics(4)]
17585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17586pub unsafe fn _mm256_mask_i64scatter_pd<const SCALE: i32>(
17587 base_addr: *mut u8,
17588 k: __mmask8,
17589 vindex: __m256i,
17590 a: __m256d,
17591) {
17592 static_assert_imm8_scale!(SCALE);
17593 vscatterqpd_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_f64x4(), SCALE)
17594}
17595
17596/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17597/// at packed 64-bit integer indices stored in vindex scaled by scale
17598///
17599/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_i64scatter_ps)
17600#[inline]
17601#[target_feature(enable = "avx512f,avx512vl")]
17602#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17603#[rustc_legacy_const_generics(3)]
17604#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17605pub unsafe fn _mm256_i64scatter_ps<const SCALE: i32>(
17606 base_addr: *mut u8,
17607 vindex: __m256i,
17608 a: __m128,
17609) {
17610 static_assert_imm8_scale!(SCALE);
17611 vscatterqps_256(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x4(), src:a.as_f32x4(), SCALE)
17612}
17613
17614/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17615/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17616/// mask bit is not set are not written to memory).
17617///
17618/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_i64scatter_ps)
17619#[inline]
17620#[target_feature(enable = "avx512f,avx512vl")]
17621#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
17622#[rustc_legacy_const_generics(4)]
17623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17624pub unsafe fn _mm256_mask_i64scatter_ps<const SCALE: i32>(
17625 base_addr: *mut u8,
17626 k: __mmask8,
17627 vindex: __m256i,
17628 a: __m128,
17629) {
17630 static_assert_imm8_scale!(SCALE);
17631 vscatterqps_256(slice:base_addr as _, k, offsets:vindex.as_i64x4(), src:a.as_f32x4(), SCALE)
17632}
17633
17634/// Loads 8 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17635/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17636/// mask bit is not set).
17637///
17638/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi32)
17639#[inline]
17640#[target_feature(enable = "avx512f,avx512vl")]
17641#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
17642#[rustc_legacy_const_generics(4)]
17643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17644pub unsafe fn _mm256_mmask_i32gather_epi32<const SCALE: i32>(
17645 src: __m256i,
17646 k: __mmask8,
17647 vindex: __m256i,
17648 base_addr: *const u8,
17649) -> __m256i {
17650 static_assert_imm8_scale!(SCALE);
17651 transmute(src:vpgatherdd_256(
17652 src.as_i32x8(),
17653 slice:base_addr as _,
17654 offsets:vindex.as_i32x8(),
17655 k,
17656 SCALE,
17657 ))
17658}
17659
17660/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17661/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17662/// mask bit is not set).
17663///
17664/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_epi64)
17665#[inline]
17666#[target_feature(enable = "avx512f,avx512vl")]
17667#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
17668#[rustc_legacy_const_generics(4)]
17669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17670pub unsafe fn _mm256_mmask_i32gather_epi64<const SCALE: i32>(
17671 src: __m256i,
17672 k: __mmask8,
17673 vindex: __m128i,
17674 base_addr: *const u8,
17675) -> __m256i {
17676 static_assert_imm8_scale!(SCALE);
17677 transmute(src:vpgatherdq_256(
17678 src.as_i64x4(),
17679 slice:base_addr as _,
17680 offsets:vindex.as_i32x4(),
17681 k,
17682 SCALE,
17683 ))
17684}
17685
17686/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17687/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17688/// from src when the corresponding mask bit is not set).
17689///
17690/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_pd)
17691#[inline]
17692#[target_feature(enable = "avx512f,avx512vl")]
17693#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
17694#[rustc_legacy_const_generics(4)]
17695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17696pub unsafe fn _mm256_mmask_i32gather_pd<const SCALE: i32>(
17697 src: __m256d,
17698 k: __mmask8,
17699 vindex: __m128i,
17700 base_addr: *const u8,
17701) -> __m256d {
17702 static_assert_imm8_scale!(SCALE);
17703 transmute(src:vgatherdpd_256(
17704 src.as_f64x4(),
17705 slice:base_addr as _,
17706 offsets:vindex.as_i32x4(),
17707 k,
17708 SCALE,
17709 ))
17710}
17711
17712/// Loads 8 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17713/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17714/// from src when the corresponding mask bit is not set).
17715///
17716/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i32gather_ps)
17717#[inline]
17718#[target_feature(enable = "avx512f,avx512vl")]
17719#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
17720#[rustc_legacy_const_generics(4)]
17721#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17722pub unsafe fn _mm256_mmask_i32gather_ps<const SCALE: i32>(
17723 src: __m256,
17724 k: __mmask8,
17725 vindex: __m256i,
17726 base_addr: *const u8,
17727) -> __m256 {
17728 static_assert_imm8_scale!(SCALE);
17729 transmute(src:vgatherdps_256(
17730 src.as_f32x8(),
17731 slice:base_addr as _,
17732 offsets:vindex.as_i32x8(),
17733 k,
17734 SCALE,
17735 ))
17736}
17737
17738/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
17739/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17740/// mask bit is not set).
17741///
17742/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi32)
17743#[inline]
17744#[target_feature(enable = "avx512f,avx512vl")]
17745#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
17746#[rustc_legacy_const_generics(4)]
17747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17748pub unsafe fn _mm256_mmask_i64gather_epi32<const SCALE: i32>(
17749 src: __m128i,
17750 k: __mmask8,
17751 vindex: __m256i,
17752 base_addr: *const u8,
17753) -> __m128i {
17754 static_assert_imm8_scale!(SCALE);
17755 transmute(src:vpgatherqd_256(
17756 src.as_i32x4(),
17757 slice:base_addr as _,
17758 offsets:vindex.as_i64x4(),
17759 k,
17760 SCALE,
17761 ))
17762}
17763
17764/// Loads 4 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
17765/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
17766/// mask bit is not set).
17767///
17768/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_epi64)
17769#[inline]
17770#[target_feature(enable = "avx512f,avx512vl")]
17771#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
17772#[rustc_legacy_const_generics(4)]
17773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17774pub unsafe fn _mm256_mmask_i64gather_epi64<const SCALE: i32>(
17775 src: __m256i,
17776 k: __mmask8,
17777 vindex: __m256i,
17778 base_addr: *const u8,
17779) -> __m256i {
17780 static_assert_imm8_scale!(SCALE);
17781 transmute(src:vpgatherqq_256(
17782 src.as_i64x4(),
17783 slice:base_addr as _,
17784 offsets:vindex.as_i64x4(),
17785 k,
17786 SCALE,
17787 ))
17788}
17789
17790/// Loads 4 double-precision (64-bit) floating-point elements from memory starting at location base_addr
17791/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17792/// from src when the corresponding mask bit is not set).
17793///
17794/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_pd)
17795#[inline]
17796#[target_feature(enable = "avx512f,avx512vl")]
17797#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
17798#[rustc_legacy_const_generics(4)]
17799#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17800pub unsafe fn _mm256_mmask_i64gather_pd<const SCALE: i32>(
17801 src: __m256d,
17802 k: __mmask8,
17803 vindex: __m256i,
17804 base_addr: *const u8,
17805) -> __m256d {
17806 static_assert_imm8_scale!(SCALE);
17807 transmute(src:vgatherqpd_256(
17808 src.as_f64x4(),
17809 slice:base_addr as _,
17810 offsets:vindex.as_i64x4(),
17811 k,
17812 SCALE,
17813 ))
17814}
17815
17816/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
17817/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
17818/// from src when the corresponding mask bit is not set).
17819///
17820/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mmask_i64gather_ps)
17821#[inline]
17822#[target_feature(enable = "avx512f,avx512vl")]
17823#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
17824#[rustc_legacy_const_generics(4)]
17825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17826pub unsafe fn _mm256_mmask_i64gather_ps<const SCALE: i32>(
17827 src: __m128,
17828 k: __mmask8,
17829 vindex: __m256i,
17830 base_addr: *const u8,
17831) -> __m128 {
17832 static_assert_imm8_scale!(SCALE);
17833 transmute(src:vgatherqps_256(
17834 src.as_f32x4(),
17835 slice:base_addr as _,
17836 offsets:vindex.as_i64x4(),
17837 k,
17838 SCALE,
17839 ))
17840}
17841
17842/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17843/// indices stored in vindex scaled by scale
17844///
17845/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi32)
17846#[inline]
17847#[target_feature(enable = "avx512f,avx512vl")]
17848#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17849#[rustc_legacy_const_generics(3)]
17850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17851pub unsafe fn _mm_i32scatter_epi32<const SCALE: i32>(
17852 base_addr: *mut u8,
17853 vindex: __m128i,
17854 a: __m128i,
17855) {
17856 static_assert_imm8_scale!(SCALE);
17857 vpscatterdd_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x4(), src:a.as_i32x4(), SCALE)
17858}
17859
17860/// Stores 4 32-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17861/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17862/// are not written to memory).
17863///
17864/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi32)
17865#[inline]
17866#[target_feature(enable = "avx512f,avx512vl")]
17867#[cfg_attr(test, assert_instr(vpscatterdd, SCALE = 1))]
17868#[rustc_legacy_const_generics(4)]
17869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17870pub unsafe fn _mm_mask_i32scatter_epi32<const SCALE: i32>(
17871 base_addr: *mut u8,
17872 k: __mmask8,
17873 vindex: __m128i,
17874 a: __m128i,
17875) {
17876 static_assert_imm8_scale!(SCALE);
17877 vpscatterdd_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_i32x4(), SCALE)
17878}
17879
17880/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17881/// indices stored in vindex scaled by scale
17882///
17883/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_epi64)
17884#[inline]
17885#[target_feature(enable = "avx512f,avx512vl")]
17886#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17887#[rustc_legacy_const_generics(3)]
17888#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17889pub unsafe fn _mm_i32scatter_epi64<const SCALE: i32>(
17890 base_addr: *mut u8,
17891 vindex: __m128i,
17892 a: __m128i,
17893) {
17894 static_assert_imm8_scale!(SCALE);
17895 vpscatterdq_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x4(), src:a.as_i64x2(), SCALE)
17896}
17897
17898/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 32-bit integer
17899/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
17900/// are not written to memory).
17901///
17902/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_epi64)
17903#[inline]
17904#[target_feature(enable = "avx512f,avx512vl")]
17905#[cfg_attr(test, assert_instr(vpscatterdq, SCALE = 1))]
17906#[rustc_legacy_const_generics(4)]
17907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17908pub unsafe fn _mm_mask_i32scatter_epi64<const SCALE: i32>(
17909 base_addr: *mut u8,
17910 k: __mmask8,
17911 vindex: __m128i,
17912 a: __m128i,
17913) {
17914 static_assert_imm8_scale!(SCALE);
17915 vpscatterdq_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_i64x2(), SCALE)
17916}
17917
17918/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17919/// at packed 32-bit integer indices stored in vindex scaled by scale
17920///
17921/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_pd)
17922#[inline]
17923#[target_feature(enable = "avx512f,avx512vl")]
17924#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17925#[rustc_legacy_const_generics(3)]
17926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17927pub unsafe fn _mm_i32scatter_pd<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128d) {
17928 static_assert_imm8_scale!(SCALE);
17929 vscatterdpd_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x4(), src:a.as_f64x2(), SCALE)
17930}
17931
17932/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
17933/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17934/// mask bit is not set are not written to memory).
17935///
17936/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_pd)
17937#[inline]
17938#[target_feature(enable = "avx512f,avx512vl")]
17939#[cfg_attr(test, assert_instr(vscatterdpd, SCALE = 1))]
17940#[rustc_legacy_const_generics(4)]
17941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17942pub unsafe fn _mm_mask_i32scatter_pd<const SCALE: i32>(
17943 base_addr: *mut u8,
17944 k: __mmask8,
17945 vindex: __m128i,
17946 a: __m128d,
17947) {
17948 static_assert_imm8_scale!(SCALE);
17949 vscatterdpd_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_f64x2(), SCALE)
17950}
17951
17952/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17953/// at packed 32-bit integer indices stored in vindex scaled by scale
17954///
17955/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i32scatter_ps)
17956#[inline]
17957#[target_feature(enable = "avx512f,avx512vl")]
17958#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17959#[rustc_legacy_const_generics(3)]
17960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17961pub unsafe fn _mm_i32scatter_ps<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128) {
17962 static_assert_imm8_scale!(SCALE);
17963 vscatterdps_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i32x4(), src:a.as_f32x4(), SCALE)
17964}
17965
17966/// Stores 4 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
17967/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
17968/// mask bit is not set are not written to memory).
17969///
17970/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i32scatter_ps)
17971#[inline]
17972#[target_feature(enable = "avx512f,avx512vl")]
17973#[cfg_attr(test, assert_instr(vscatterdps, SCALE = 1))]
17974#[rustc_legacy_const_generics(4)]
17975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17976pub unsafe fn _mm_mask_i32scatter_ps<const SCALE: i32>(
17977 base_addr: *mut u8,
17978 k: __mmask8,
17979 vindex: __m128i,
17980 a: __m128,
17981) {
17982 static_assert_imm8_scale!(SCALE);
17983 vscatterdps_128(slice:base_addr as _, k, offsets:vindex.as_i32x4(), src:a.as_f32x4(), SCALE)
17984}
17985
17986/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
17987/// indices stored in vindex scaled by scale
17988///
17989/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi32)
17990#[inline]
17991#[target_feature(enable = "avx512f,avx512vl")]
17992#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
17993#[rustc_legacy_const_generics(3)]
17994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
17995pub unsafe fn _mm_i64scatter_epi32<const SCALE: i32>(
17996 base_addr: *mut u8,
17997 vindex: __m128i,
17998 a: __m128i,
17999) {
18000 static_assert_imm8_scale!(SCALE);
18001 vpscatterqd_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x2(), src:a.as_i32x4(), SCALE)
18002}
18003
18004/// Stores 2 32-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18005/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18006/// are not written to memory).
18007///
18008/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi32)
18009#[inline]
18010#[target_feature(enable = "avx512f,avx512vl")]
18011#[cfg_attr(test, assert_instr(vpscatterqd, SCALE = 1))]
18012#[rustc_legacy_const_generics(4)]
18013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18014pub unsafe fn _mm_mask_i64scatter_epi32<const SCALE: i32>(
18015 base_addr: *mut u8,
18016 k: __mmask8,
18017 vindex: __m128i,
18018 a: __m128i,
18019) {
18020 static_assert_imm8_scale!(SCALE);
18021 vpscatterqd_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_i32x4(), SCALE)
18022}
18023
18024/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18025/// indices stored in vindex scaled by scale
18026///
18027/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_epi64)
18028#[inline]
18029#[target_feature(enable = "avx512f,avx512vl")]
18030#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18031#[rustc_legacy_const_generics(3)]
18032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18033pub unsafe fn _mm_i64scatter_epi64<const SCALE: i32>(
18034 base_addr: *mut u8,
18035 vindex: __m128i,
18036 a: __m128i,
18037) {
18038 static_assert_imm8_scale!(SCALE);
18039 vpscatterqq_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x2(), src:a.as_i64x2(), SCALE)
18040}
18041
18042/// Stores 2 64-bit integer elements from a to memory starting at location base_addr at packed 64-bit integer
18043/// indices stored in vindex scaled by scale using writemask k (elements whose corresponding mask bit is not set
18044/// are not written to memory).
18045///
18046/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_epi64)
18047#[inline]
18048#[target_feature(enable = "avx512f,avx512vl")]
18049#[cfg_attr(test, assert_instr(vpscatterqq, SCALE = 1))]
18050#[rustc_legacy_const_generics(4)]
18051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18052pub unsafe fn _mm_mask_i64scatter_epi64<const SCALE: i32>(
18053 base_addr: *mut u8,
18054 k: __mmask8,
18055 vindex: __m128i,
18056 a: __m128i,
18057) {
18058 static_assert_imm8_scale!(SCALE);
18059 vpscatterqq_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_i64x2(), SCALE)
18060}
18061
18062/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18063/// at packed 64-bit integer indices stored in vindex scaled by scale
18064///
18065/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_pd)
18066#[inline]
18067#[target_feature(enable = "avx512f,avx512vl")]
18068#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18069#[rustc_legacy_const_generics(3)]
18070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18071pub unsafe fn _mm_i64scatter_pd<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128d) {
18072 static_assert_imm8_scale!(SCALE);
18073 vscatterqpd_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x2(), src:a.as_f64x2(), SCALE)
18074}
18075
18076/// Stores 2 double-precision (64-bit) floating-point elements from a to memory starting at location base_addr
18077/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18078/// mask bit is not set are not written to memory).
18079///
18080/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_pd)
18081#[inline]
18082#[target_feature(enable = "avx512f,avx512vl")]
18083#[cfg_attr(test, assert_instr(vscatterqpd, SCALE = 1))]
18084#[rustc_legacy_const_generics(4)]
18085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18086pub unsafe fn _mm_mask_i64scatter_pd<const SCALE: i32>(
18087 base_addr: *mut u8,
18088 k: __mmask8,
18089 vindex: __m128i,
18090 a: __m128d,
18091) {
18092 static_assert_imm8_scale!(SCALE);
18093 vscatterqpd_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_f64x2(), SCALE)
18094}
18095
18096/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18097/// at packed 64-bit integer indices stored in vindex scaled by scale
18098///
18099/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_i64scatter_ps)
18100#[inline]
18101#[target_feature(enable = "avx512f,avx512vl")]
18102#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18103#[rustc_legacy_const_generics(3)]
18104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18105pub unsafe fn _mm_i64scatter_ps<const SCALE: i32>(base_addr: *mut u8, vindex: __m128i, a: __m128) {
18106 static_assert_imm8_scale!(SCALE);
18107 vscatterqps_128(slice:base_addr as _, k:0xff, offsets:vindex.as_i64x2(), src:a.as_f32x4(), SCALE)
18108}
18109
18110/// Stores 2 single-precision (32-bit) floating-point elements from a to memory starting at location base_addr
18111/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements whose corresponding
18112///
18113/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_i64scatter_ps)
18114#[inline]
18115#[target_feature(enable = "avx512f,avx512vl")]
18116#[cfg_attr(test, assert_instr(vscatterqps, SCALE = 1))]
18117#[rustc_legacy_const_generics(4)]
18118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18119pub unsafe fn _mm_mask_i64scatter_ps<const SCALE: i32>(
18120 base_addr: *mut u8,
18121 k: __mmask8,
18122 vindex: __m128i,
18123 a: __m128,
18124) {
18125 static_assert_imm8_scale!(SCALE);
18126 vscatterqps_128(slice:base_addr as _, k, offsets:vindex.as_i64x2(), src:a.as_f32x4(), SCALE)
18127}
18128
18129/// Loads 4 32-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18130/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18131/// mask bit is not set).
18132///
18133/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi32)
18134#[inline]
18135#[target_feature(enable = "avx512f,avx512vl")]
18136#[cfg_attr(test, assert_instr(vpgatherdd, SCALE = 1))]
18137#[rustc_legacy_const_generics(4)]
18138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18139pub unsafe fn _mm_mmask_i32gather_epi32<const SCALE: i32>(
18140 src: __m128i,
18141 k: __mmask8,
18142 vindex: __m128i,
18143 base_addr: *const u8,
18144) -> __m128i {
18145 static_assert_imm8_scale!(SCALE);
18146 transmute(src:vpgatherdd_128(
18147 src.as_i32x4(),
18148 slice:base_addr as _,
18149 offsets:vindex.as_i32x4(),
18150 k,
18151 SCALE,
18152 ))
18153}
18154
18155/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 32-bit integer
18156/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18157/// mask bit is not set).
18158///
18159/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_epi64)
18160#[inline]
18161#[target_feature(enable = "avx512f,avx512vl")]
18162#[cfg_attr(test, assert_instr(vpgatherdq, SCALE = 1))]
18163#[rustc_legacy_const_generics(4)]
18164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18165pub unsafe fn _mm_mmask_i32gather_epi64<const SCALE: i32>(
18166 src: __m128i,
18167 k: __mmask8,
18168 vindex: __m128i,
18169 base_addr: *const u8,
18170) -> __m128i {
18171 static_assert_imm8_scale!(SCALE);
18172 transmute(src:vpgatherdq_128(
18173 src.as_i64x2(),
18174 slice:base_addr as _,
18175 offsets:vindex.as_i32x4(),
18176 k,
18177 SCALE,
18178 ))
18179}
18180
18181/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18182/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18183/// from src when the corresponding mask bit is not set).
18184///
18185/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_pd)
18186#[inline]
18187#[target_feature(enable = "avx512f,avx512vl")]
18188#[cfg_attr(test, assert_instr(vgatherdpd, SCALE = 1))]
18189#[rustc_legacy_const_generics(4)]
18190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18191pub unsafe fn _mm_mmask_i32gather_pd<const SCALE: i32>(
18192 src: __m128d,
18193 k: __mmask8,
18194 vindex: __m128i,
18195 base_addr: *const u8,
18196) -> __m128d {
18197 static_assert_imm8_scale!(SCALE);
18198 transmute(src:vgatherdpd_128(
18199 src.as_f64x2(),
18200 slice:base_addr as _,
18201 offsets:vindex.as_i32x4(),
18202 k,
18203 SCALE,
18204 ))
18205}
18206
18207/// Loads 4 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18208/// at packed 32-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18209/// from src when the corresponding mask bit is not set).
18210///
18211/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i32gather_ps)
18212#[inline]
18213#[target_feature(enable = "avx512f,avx512vl")]
18214#[cfg_attr(test, assert_instr(vgatherdps, SCALE = 1))]
18215#[rustc_legacy_const_generics(4)]
18216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18217pub unsafe fn _mm_mmask_i32gather_ps<const SCALE: i32>(
18218 src: __m128,
18219 k: __mmask8,
18220 vindex: __m128i,
18221 base_addr: *const u8,
18222) -> __m128 {
18223 static_assert_imm8_scale!(SCALE);
18224 transmute(src:vgatherdps_128(
18225 src.as_f32x4(),
18226 slice:base_addr as _,
18227 offsets:vindex.as_i32x4(),
18228 k,
18229 SCALE,
18230 ))
18231}
18232
18233/// Loads 2 32-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18234/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18235/// mask bit is not set).
18236///
18237/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi32)
18238#[inline]
18239#[target_feature(enable = "avx512f,avx512vl")]
18240#[cfg_attr(test, assert_instr(vpgatherqd, SCALE = 1))]
18241#[rustc_legacy_const_generics(4)]
18242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18243pub unsafe fn _mm_mmask_i64gather_epi32<const SCALE: i32>(
18244 src: __m128i,
18245 k: __mmask8,
18246 vindex: __m128i,
18247 base_addr: *const u8,
18248) -> __m128i {
18249 static_assert_imm8_scale!(SCALE);
18250 transmute(src:vpgatherqd_128(
18251 src.as_i32x4(),
18252 slice:base_addr as _,
18253 offsets:vindex.as_i64x2(),
18254 k,
18255 SCALE,
18256 ))
18257}
18258
18259/// Loads 2 64-bit integer elements from memory starting at location base_addr at packed 64-bit integer
18260/// indices stored in vindex scaled by scale using writemask k (elements are copied from src when the corresponding
18261/// mask bit is not set).
18262///
18263/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_epi64)
18264#[inline]
18265#[target_feature(enable = "avx512f,avx512vl")]
18266#[cfg_attr(test, assert_instr(vpgatherqq, SCALE = 1))]
18267#[rustc_legacy_const_generics(4)]
18268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18269pub unsafe fn _mm_mmask_i64gather_epi64<const SCALE: i32>(
18270 src: __m128i,
18271 k: __mmask8,
18272 vindex: __m128i,
18273 base_addr: *const u8,
18274) -> __m128i {
18275 static_assert_imm8_scale!(SCALE);
18276 transmute(src:vpgatherqq_128(
18277 src.as_i64x2(),
18278 slice:base_addr as _,
18279 offsets:vindex.as_i64x2(),
18280 k,
18281 SCALE,
18282 ))
18283}
18284
18285/// Loads 2 double-precision (64-bit) floating-point elements from memory starting at location base_addr
18286/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18287/// from src when the corresponding mask bit is not set).
18288///
18289/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_pd)
18290#[inline]
18291#[target_feature(enable = "avx512f,avx512vl")]
18292#[cfg_attr(test, assert_instr(vgatherqpd, SCALE = 1))]
18293#[rustc_legacy_const_generics(4)]
18294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18295pub unsafe fn _mm_mmask_i64gather_pd<const SCALE: i32>(
18296 src: __m128d,
18297 k: __mmask8,
18298 vindex: __m128i,
18299 base_addr: *const u8,
18300) -> __m128d {
18301 static_assert_imm8_scale!(SCALE);
18302 transmute(src:vgatherqpd_128(
18303 src.as_f64x2(),
18304 slice:base_addr as _,
18305 offsets:vindex.as_i64x2(),
18306 k,
18307 SCALE,
18308 ))
18309}
18310
18311/// Loads 2 single-precision (32-bit) floating-point elements from memory starting at location base_addr
18312/// at packed 64-bit integer indices stored in vindex scaled by scale using writemask k (elements are copied
18313/// from src when the corresponding mask bit is not set).
18314///
18315/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mmask_i64gather_ps)
18316#[inline]
18317#[target_feature(enable = "avx512f,avx512vl")]
18318#[cfg_attr(test, assert_instr(vgatherqps, SCALE = 1))]
18319#[rustc_legacy_const_generics(4)]
18320#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18321pub unsafe fn _mm_mmask_i64gather_ps<const SCALE: i32>(
18322 src: __m128,
18323 k: __mmask8,
18324 vindex: __m128i,
18325 base_addr: *const u8,
18326) -> __m128 {
18327 static_assert_imm8_scale!(SCALE);
18328 transmute(src:vgatherqps_128(
18329 src.as_f32x4(),
18330 slice:base_addr as _,
18331 offsets:vindex.as_i64x2(),
18332 k,
18333 SCALE,
18334 ))
18335}
18336
18337/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18338///
18339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi32&expand=1198)
18340#[inline]
18341#[target_feature(enable = "avx512f")]
18342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18343#[cfg_attr(test, assert_instr(vpcompressd))]
18344pub fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18345 unsafe { transmute(src:vpcompressd(a.as_i32x16(), src.as_i32x16(), mask:k)) }
18346}
18347
18348/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18349///
18350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi32&expand=1199)
18351#[inline]
18352#[target_feature(enable = "avx512f")]
18353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18354#[cfg_attr(test, assert_instr(vpcompressd))]
18355pub fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
18356 unsafe { transmute(src:vpcompressd(a.as_i32x16(), src:i32x16::ZERO, mask:k)) }
18357}
18358
18359/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18360///
18361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi32&expand=1196)
18362#[inline]
18363#[target_feature(enable = "avx512f,avx512vl")]
18364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18365#[cfg_attr(test, assert_instr(vpcompressd))]
18366pub fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18367 unsafe { transmute(src:vpcompressd256(a.as_i32x8(), src.as_i32x8(), mask:k)) }
18368}
18369
18370/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18371///
18372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi32&expand=1197)
18373#[inline]
18374#[target_feature(enable = "avx512f,avx512vl")]
18375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18376#[cfg_attr(test, assert_instr(vpcompressd))]
18377pub fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
18378 unsafe { transmute(src:vpcompressd256(a.as_i32x8(), src:i32x8::ZERO, mask:k)) }
18379}
18380
18381/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18382///
18383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi32&expand=1194)
18384#[inline]
18385#[target_feature(enable = "avx512f,avx512vl")]
18386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18387#[cfg_attr(test, assert_instr(vpcompressd))]
18388pub fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18389 unsafe { transmute(src:vpcompressd128(a.as_i32x4(), src.as_i32x4(), mask:k)) }
18390}
18391
18392/// Contiguously store the active 32-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18393///
18394/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi32&expand=1195)
18395#[inline]
18396#[target_feature(enable = "avx512f,avx512vl")]
18397#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18398#[cfg_attr(test, assert_instr(vpcompressd))]
18399pub fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
18400 unsafe { transmute(src:vpcompressd128(a.as_i32x4(), src:i32x4::ZERO, mask:k)) }
18401}
18402
18403/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18404///
18405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_epi64&expand=1204)
18406#[inline]
18407#[target_feature(enable = "avx512f")]
18408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18409#[cfg_attr(test, assert_instr(vpcompressq))]
18410pub fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18411 unsafe { transmute(src:vpcompressq(a.as_i64x8(), src.as_i64x8(), mask:k)) }
18412}
18413
18414/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18415///
18416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_epi64&expand=1205)
18417#[inline]
18418#[target_feature(enable = "avx512f")]
18419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18420#[cfg_attr(test, assert_instr(vpcompressq))]
18421pub fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
18422 unsafe { transmute(src:vpcompressq(a.as_i64x8(), src:i64x8::ZERO, mask:k)) }
18423}
18424
18425/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18426///
18427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_epi64&expand=1202)
18428#[inline]
18429#[target_feature(enable = "avx512f,avx512vl")]
18430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18431#[cfg_attr(test, assert_instr(vpcompressq))]
18432pub fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18433 unsafe { transmute(src:vpcompressq256(a.as_i64x4(), src.as_i64x4(), mask:k)) }
18434}
18435
18436/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18437///
18438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_epi64&expand=1203)
18439#[inline]
18440#[target_feature(enable = "avx512f,avx512vl")]
18441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18442#[cfg_attr(test, assert_instr(vpcompressq))]
18443pub fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
18444 unsafe { transmute(src:vpcompressq256(a.as_i64x4(), src:i64x4::ZERO, mask:k)) }
18445}
18446
18447/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18448///
18449/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_epi64&expand=1200)
18450#[inline]
18451#[target_feature(enable = "avx512f,avx512vl")]
18452#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18453#[cfg_attr(test, assert_instr(vpcompressq))]
18454pub fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18455 unsafe { transmute(src:vpcompressq128(a.as_i64x2(), src.as_i64x2(), mask:k)) }
18456}
18457
18458/// Contiguously store the active 64-bit integers in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18459///
18460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_epi64&expand=1201)
18461#[inline]
18462#[target_feature(enable = "avx512f,avx512vl")]
18463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18464#[cfg_attr(test, assert_instr(vpcompressq))]
18465pub fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
18466 unsafe { transmute(src:vpcompressq128(a.as_i64x2(), src:i64x2::ZERO, mask:k)) }
18467}
18468
18469/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18470///
18471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_ps&expand=1222)
18472#[inline]
18473#[target_feature(enable = "avx512f")]
18474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18475#[cfg_attr(test, assert_instr(vcompressps))]
18476pub fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18477 unsafe { transmute(src:vcompressps(a.as_f32x16(), src.as_f32x16(), mask:k)) }
18478}
18479
18480/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18481///
18482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_ps&expand=1223)
18483#[inline]
18484#[target_feature(enable = "avx512f")]
18485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18486#[cfg_attr(test, assert_instr(vcompressps))]
18487pub fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
18488 unsafe { transmute(src:vcompressps(a.as_f32x16(), src:f32x16::ZERO, mask:k)) }
18489}
18490
18491/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18492///
18493/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_ps&expand=1220)
18494#[inline]
18495#[target_feature(enable = "avx512f,avx512vl")]
18496#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18497#[cfg_attr(test, assert_instr(vcompressps))]
18498pub fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18499 unsafe { transmute(src:vcompressps256(a.as_f32x8(), src.as_f32x8(), mask:k)) }
18500}
18501
18502/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18503///
18504/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_ps&expand=1221)
18505#[inline]
18506#[target_feature(enable = "avx512f,avx512vl")]
18507#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18508#[cfg_attr(test, assert_instr(vcompressps))]
18509pub fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
18510 unsafe { transmute(src:vcompressps256(a.as_f32x8(), src:f32x8::ZERO, mask:k)) }
18511}
18512
18513/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18514///
18515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_ps&expand=1218)
18516#[inline]
18517#[target_feature(enable = "avx512f,avx512vl")]
18518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18519#[cfg_attr(test, assert_instr(vcompressps))]
18520pub fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18521 unsafe { transmute(src:vcompressps128(a.as_f32x4(), src.as_f32x4(), mask:k)) }
18522}
18523
18524/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18525///
18526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_ps&expand=1219)
18527#[inline]
18528#[target_feature(enable = "avx512f,avx512vl")]
18529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18530#[cfg_attr(test, assert_instr(vcompressps))]
18531pub fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
18532 unsafe { transmute(src:vcompressps128(a.as_f32x4(), src:f32x4::ZERO, mask:k)) }
18533}
18534
18535/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18536///
18537/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compress_pd&expand=1216)
18538#[inline]
18539#[target_feature(enable = "avx512f")]
18540#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18541#[cfg_attr(test, assert_instr(vcompresspd))]
18542pub fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
18543 unsafe { transmute(src:vcompresspd(a.as_f64x8(), src.as_f64x8(), mask:k)) }
18544}
18545
18546/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18547///
18548/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_compress_pd&expand=1217)
18549#[inline]
18550#[target_feature(enable = "avx512f")]
18551#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18552#[cfg_attr(test, assert_instr(vcompresspd))]
18553pub fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
18554 unsafe { transmute(src:vcompresspd(a.as_f64x8(), src:f64x8::ZERO, mask:k)) }
18555}
18556
18557/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18558///
18559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compress_pd&expand=1214)
18560#[inline]
18561#[target_feature(enable = "avx512f,avx512vl")]
18562#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18563#[cfg_attr(test, assert_instr(vcompresspd))]
18564pub fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
18565 unsafe { transmute(src:vcompresspd256(a.as_f64x4(), src.as_f64x4(), mask:k)) }
18566}
18567
18568/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18569///
18570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_compress_pd&expand=1215)
18571#[inline]
18572#[target_feature(enable = "avx512f,avx512vl")]
18573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18574#[cfg_attr(test, assert_instr(vcompresspd))]
18575pub fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
18576 unsafe { transmute(src:vcompresspd256(a.as_f64x4(), src:f64x4::ZERO, mask:k)) }
18577}
18578
18579/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to dst, and pass through the remaining elements from src.
18580///
18581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compress_pd&expand=1212)
18582#[inline]
18583#[target_feature(enable = "avx512f,avx512vl")]
18584#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18585#[cfg_attr(test, assert_instr(vcompresspd))]
18586pub fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
18587 unsafe { transmute(src:vcompresspd128(a.as_f64x2(), src.as_f64x2(), mask:k)) }
18588}
18589
18590/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in zeromask k) to dst, and set the remaining elements to zero.
18591///
18592/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_compress_pd&expand=1213)
18593#[inline]
18594#[target_feature(enable = "avx512f,avx512vl")]
18595#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18596#[cfg_attr(test, assert_instr(vcompresspd))]
18597pub fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
18598 unsafe { transmute(src:vcompresspd128(a.as_f64x2(), src:f64x2::ZERO, mask:k)) }
18599}
18600
18601/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18602///
18603/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi32)
18604#[inline]
18605#[target_feature(enable = "avx512f")]
18606#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18607#[cfg_attr(test, assert_instr(vpcompressd))]
18608pub unsafe fn _mm512_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask16, a: __m512i) {
18609 vcompressstored(mem:base_addr as *mut _, data:a.as_i32x16(), mask:k)
18610}
18611
18612/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18613///
18614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi32)
18615#[inline]
18616#[target_feature(enable = "avx512f,avx512vl")]
18617#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18618#[cfg_attr(test, assert_instr(vpcompressd))]
18619pub unsafe fn _mm256_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m256i) {
18620 vcompressstored256(mem:base_addr as *mut _, data:a.as_i32x8(), mask:k)
18621}
18622
18623/// Contiguously store the active 32-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18624///
18625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi32)
18626#[inline]
18627#[target_feature(enable = "avx512f,avx512vl")]
18628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18629#[cfg_attr(test, assert_instr(vpcompressd))]
18630pub unsafe fn _mm_mask_compressstoreu_epi32(base_addr: *mut u8, k: __mmask8, a: __m128i) {
18631 vcompressstored128(mem:base_addr as *mut _, data:a.as_i32x4(), mask:k)
18632}
18633
18634/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18635///
18636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_epi64)
18637#[inline]
18638#[target_feature(enable = "avx512f")]
18639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18640#[cfg_attr(test, assert_instr(vpcompressq))]
18641pub unsafe fn _mm512_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m512i) {
18642 vcompressstoreq(mem:base_addr as *mut _, data:a.as_i64x8(), mask:k)
18643}
18644
18645/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18646///
18647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_epi64)
18648#[inline]
18649#[target_feature(enable = "avx512f,avx512vl")]
18650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18651#[cfg_attr(test, assert_instr(vpcompressq))]
18652pub unsafe fn _mm256_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m256i) {
18653 vcompressstoreq256(mem:base_addr as *mut _, data:a.as_i64x4(), mask:k)
18654}
18655
18656/// Contiguously store the active 64-bit integers in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18657///
18658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_epi64)
18659#[inline]
18660#[target_feature(enable = "avx512f,avx512vl")]
18661#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18662#[cfg_attr(test, assert_instr(vpcompressq))]
18663pub unsafe fn _mm_mask_compressstoreu_epi64(base_addr: *mut u8, k: __mmask8, a: __m128i) {
18664 vcompressstoreq128(mem:base_addr as *mut _, data:a.as_i64x2(), mask:k)
18665}
18666
18667/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18668///
18669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_ps)
18670#[inline]
18671#[target_feature(enable = "avx512f")]
18672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18673#[cfg_attr(test, assert_instr(vcompressps))]
18674pub unsafe fn _mm512_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask16, a: __m512) {
18675 vcompressstoreps(mem:base_addr as *mut _, data:a.as_f32x16(), mask:k)
18676}
18677
18678/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18679///
18680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_ps)
18681#[inline]
18682#[target_feature(enable = "avx512f,avx512vl")]
18683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18684#[cfg_attr(test, assert_instr(vcompressps))]
18685pub unsafe fn _mm256_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m256) {
18686 vcompressstoreps256(mem:base_addr as *mut _, data:a.as_f32x8(), mask:k)
18687}
18688
18689/// Contiguously store the active single-precision (32-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18690///
18691/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_ps)
18692#[inline]
18693#[target_feature(enable = "avx512f,avx512vl")]
18694#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18695#[cfg_attr(test, assert_instr(vcompressps))]
18696pub unsafe fn _mm_mask_compressstoreu_ps(base_addr: *mut u8, k: __mmask8, a: __m128) {
18697 vcompressstoreps128(mem:base_addr as *mut _, data:a.as_f32x4(), mask:k)
18698}
18699
18700/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18701///
18702/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_compressstoreu_pd)
18703#[inline]
18704#[target_feature(enable = "avx512f")]
18705#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18706#[cfg_attr(test, assert_instr(vcompresspd))]
18707pub unsafe fn _mm512_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m512d) {
18708 vcompressstorepd(mem:base_addr as *mut _, data:a.as_f64x8(), mask:k)
18709}
18710
18711/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18712///
18713/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_compressstoreu_pd)
18714#[inline]
18715#[target_feature(enable = "avx512f,avx512vl")]
18716#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18717#[cfg_attr(test, assert_instr(vcompresspd))]
18718pub unsafe fn _mm256_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m256d) {
18719 vcompressstorepd256(mem:base_addr as *mut _, data:a.as_f64x4(), mask:k)
18720}
18721
18722/// Contiguously store the active double-precision (64-bit) floating-point elements in a (those with their respective bit set in writemask k) to unaligned memory at base_addr.
18723///
18724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_compressstoreu_pd)
18725#[inline]
18726#[target_feature(enable = "avx512f,avx512vl")]
18727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18728#[cfg_attr(test, assert_instr(vcompresspd))]
18729pub unsafe fn _mm_mask_compressstoreu_pd(base_addr: *mut u8, k: __mmask8, a: __m128d) {
18730 vcompressstorepd128(mem:base_addr as *mut _, data:a.as_f64x2(), mask:k)
18731}
18732
18733/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18734///
18735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi32&expand=2316)
18736#[inline]
18737#[target_feature(enable = "avx512f")]
18738#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18739#[cfg_attr(test, assert_instr(vpexpandd))]
18740pub fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
18741 unsafe { transmute(src:vpexpandd(a.as_i32x16(), src.as_i32x16(), mask:k)) }
18742}
18743
18744/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18745///
18746/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi32&expand=2317)
18747#[inline]
18748#[target_feature(enable = "avx512f")]
18749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18750#[cfg_attr(test, assert_instr(vpexpandd))]
18751pub fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
18752 unsafe { transmute(src:vpexpandd(a.as_i32x16(), src:i32x16::ZERO, mask:k)) }
18753}
18754
18755/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18756///
18757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi32&expand=2314)
18758#[inline]
18759#[target_feature(enable = "avx512f,avx512vl")]
18760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18761#[cfg_attr(test, assert_instr(vpexpandd))]
18762pub fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18763 unsafe { transmute(src:vpexpandd256(a.as_i32x8(), src.as_i32x8(), mask:k)) }
18764}
18765
18766/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18767///
18768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi32&expand=2315)
18769#[inline]
18770#[target_feature(enable = "avx512f,avx512vl")]
18771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18772#[cfg_attr(test, assert_instr(vpexpandd))]
18773pub fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
18774 unsafe { transmute(src:vpexpandd256(a.as_i32x8(), src:i32x8::ZERO, mask:k)) }
18775}
18776
18777/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18778///
18779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi32&expand=2312)
18780#[inline]
18781#[target_feature(enable = "avx512f,avx512vl")]
18782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18783#[cfg_attr(test, assert_instr(vpexpandd))]
18784pub fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18785 unsafe { transmute(src:vpexpandd128(a.as_i32x4(), src.as_i32x4(), mask:k)) }
18786}
18787
18788/// Load contiguous active 32-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18789///
18790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi32&expand=2313)
18791#[inline]
18792#[target_feature(enable = "avx512f,avx512vl")]
18793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18794#[cfg_attr(test, assert_instr(vpexpandd))]
18795pub fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
18796 unsafe { transmute(src:vpexpandd128(a.as_i32x4(), src:i32x4::ZERO, mask:k)) }
18797}
18798
18799/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18800///
18801/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_epi64&expand=2322)
18802#[inline]
18803#[target_feature(enable = "avx512f")]
18804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18805#[cfg_attr(test, assert_instr(vpexpandq))]
18806pub fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
18807 unsafe { transmute(src:vpexpandq(a.as_i64x8(), src.as_i64x8(), mask:k)) }
18808}
18809
18810/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18811///
18812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_epi64&expand=2323)
18813#[inline]
18814#[target_feature(enable = "avx512f")]
18815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18816#[cfg_attr(test, assert_instr(vpexpandq))]
18817pub fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
18818 unsafe { transmute(src:vpexpandq(a.as_i64x8(), src:i64x8::ZERO, mask:k)) }
18819}
18820
18821/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18822///
18823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_epi64&expand=2320)
18824#[inline]
18825#[target_feature(enable = "avx512f,avx512vl")]
18826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18827#[cfg_attr(test, assert_instr(vpexpandq))]
18828pub fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
18829 unsafe { transmute(src:vpexpandq256(a.as_i64x4(), src.as_i64x4(), mask:k)) }
18830}
18831
18832/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18833///
18834/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_epi64&expand=2321)
18835#[inline]
18836#[target_feature(enable = "avx512f,avx512vl")]
18837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18838#[cfg_attr(test, assert_instr(vpexpandq))]
18839pub fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
18840 unsafe { transmute(src:vpexpandq256(a.as_i64x4(), src:i64x4::ZERO, mask:k)) }
18841}
18842
18843/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18844///
18845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_epi64&expand=2318)
18846#[inline]
18847#[target_feature(enable = "avx512f,avx512vl")]
18848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18849#[cfg_attr(test, assert_instr(vpexpandq))]
18850pub fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
18851 unsafe { transmute(src:vpexpandq128(a.as_i64x2(), src.as_i64x2(), mask:k)) }
18852}
18853
18854/// Load contiguous active 64-bit integers from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18855///
18856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_epi64&expand=2319)
18857#[inline]
18858#[target_feature(enable = "avx512f,avx512vl")]
18859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18860#[cfg_attr(test, assert_instr(vpexpandq))]
18861pub fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
18862 unsafe { transmute(src:vpexpandq128(a.as_i64x2(), src:i64x2::ZERO, mask:k)) }
18863}
18864
18865/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18866///
18867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_ps&expand=2340)
18868#[inline]
18869#[target_feature(enable = "avx512f")]
18870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18871#[cfg_attr(test, assert_instr(vexpandps))]
18872pub fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
18873 unsafe { transmute(src:vexpandps(a.as_f32x16(), src.as_f32x16(), mask:k)) }
18874}
18875
18876/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18877///
18878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_ps&expand=2341)
18879#[inline]
18880#[target_feature(enable = "avx512f")]
18881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18882#[cfg_attr(test, assert_instr(vexpandps))]
18883pub fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
18884 unsafe { transmute(src:vexpandps(a.as_f32x16(), src:f32x16::ZERO, mask:k)) }
18885}
18886
18887/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18888///
18889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_ps&expand=2338)
18890#[inline]
18891#[target_feature(enable = "avx512f,avx512vl")]
18892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18893#[cfg_attr(test, assert_instr(vexpandps))]
18894pub fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
18895 unsafe { transmute(src:vexpandps256(a.as_f32x8(), src.as_f32x8(), mask:k)) }
18896}
18897
18898/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18899///
18900/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_ps&expand=2339)
18901#[inline]
18902#[target_feature(enable = "avx512f,avx512vl")]
18903#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18904#[cfg_attr(test, assert_instr(vexpandps))]
18905pub fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
18906 unsafe { transmute(src:vexpandps256(a.as_f32x8(), src:f32x8::ZERO, mask:k)) }
18907}
18908
18909/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18910///
18911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_ps&expand=2336)
18912#[inline]
18913#[target_feature(enable = "avx512f,avx512vl")]
18914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18915#[cfg_attr(test, assert_instr(vexpandps))]
18916pub fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
18917 unsafe { transmute(src:vexpandps128(a.as_f32x4(), src.as_f32x4(), mask:k)) }
18918}
18919
18920/// Load contiguous active single-precision (32-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18921///
18922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_ps&expand=2337)
18923#[inline]
18924#[target_feature(enable = "avx512f,avx512vl")]
18925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18926#[cfg_attr(test, assert_instr(vexpandps))]
18927pub fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
18928 unsafe { transmute(src:vexpandps128(a.as_f32x4(), src:f32x4::ZERO, mask:k)) }
18929}
18930
18931/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18932///
18933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expand_pd&expand=2334)
18934#[inline]
18935#[target_feature(enable = "avx512f")]
18936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18937#[cfg_attr(test, assert_instr(vexpandpd))]
18938pub fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
18939 unsafe { transmute(src:vexpandpd(a.as_f64x8(), src.as_f64x8(), mask:k)) }
18940}
18941
18942/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18943///
18944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expand_pd&expand=2335)
18945#[inline]
18946#[target_feature(enable = "avx512f")]
18947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18948#[cfg_attr(test, assert_instr(vexpandpd))]
18949pub fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
18950 unsafe { transmute(src:vexpandpd(a.as_f64x8(), src:f64x8::ZERO, mask:k)) }
18951}
18952
18953/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18954///
18955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expand_pd&expand=2332)
18956#[inline]
18957#[target_feature(enable = "avx512f,avx512vl")]
18958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18959#[cfg_attr(test, assert_instr(vexpandpd))]
18960pub fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
18961 unsafe { transmute(src:vexpandpd256(a.as_f64x4(), src.as_f64x4(), mask:k)) }
18962}
18963
18964/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18965///
18966/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expand_pd&expand=2333)
18967#[inline]
18968#[target_feature(enable = "avx512f,avx512vl")]
18969#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18970#[cfg_attr(test, assert_instr(vexpandpd))]
18971pub fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
18972 unsafe { transmute(src:vexpandpd256(a.as_f64x4(), src:f64x4::ZERO, mask:k)) }
18973}
18974
18975/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
18976///
18977/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expand_pd&expand=2330)
18978#[inline]
18979#[target_feature(enable = "avx512f,avx512vl")]
18980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18981#[cfg_attr(test, assert_instr(vexpandpd))]
18982pub fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
18983 unsafe { transmute(src:vexpandpd128(a.as_f64x2(), src.as_f64x2(), mask:k)) }
18984}
18985
18986/// Load contiguous active double-precision (64-bit) floating-point elements from a (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
18987///
18988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expand_pd&expand=2331)
18989#[inline]
18990#[target_feature(enable = "avx512f,avx512vl")]
18991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
18992#[cfg_attr(test, assert_instr(vexpandpd))]
18993pub fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
18994 unsafe { transmute(src:vexpandpd128(a.as_f64x2(), src:f64x2::ZERO, mask:k)) }
18995}
18996
18997/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
18998///
18999/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi32&expand=4685)
19000#[inline]
19001#[target_feature(enable = "avx512f")]
19002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19003#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19004#[rustc_legacy_const_generics(1)]
19005pub fn _mm512_rol_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19006 unsafe {
19007 static_assert_uimm_bits!(IMM8, 8);
19008 let a: i32x16 = a.as_i32x16();
19009 let r: i32x16 = vprold(a, IMM8);
19010 transmute(src:r)
19011 }
19012}
19013
19014/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19015///
19016/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi32&expand=4683)
19017#[inline]
19018#[target_feature(enable = "avx512f")]
19019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19020#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19021#[rustc_legacy_const_generics(3)]
19022pub fn _mm512_mask_rol_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19023 unsafe {
19024 static_assert_uimm_bits!(IMM8, 8);
19025 let a: i32x16 = a.as_i32x16();
19026 let r: i32x16 = vprold(a, IMM8);
19027 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
19028 }
19029}
19030
19031/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19032///
19033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi32&expand=4684)
19034#[inline]
19035#[target_feature(enable = "avx512f")]
19036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19037#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19038#[rustc_legacy_const_generics(2)]
19039pub fn _mm512_maskz_rol_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19040 unsafe {
19041 static_assert_uimm_bits!(IMM8, 8);
19042 let a: i32x16 = a.as_i32x16();
19043 let r: i32x16 = vprold(a, IMM8);
19044 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
19045 }
19046}
19047
19048/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19049///
19050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi32&expand=4682)
19051#[inline]
19052#[target_feature(enable = "avx512f,avx512vl")]
19053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19054#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19055#[rustc_legacy_const_generics(1)]
19056pub fn _mm256_rol_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19057 unsafe {
19058 static_assert_uimm_bits!(IMM8, 8);
19059 let a: i32x8 = a.as_i32x8();
19060 let r: i32x8 = vprold256(a, IMM8);
19061 transmute(src:r)
19062 }
19063}
19064
19065/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19066///
19067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi32&expand=4680)
19068#[inline]
19069#[target_feature(enable = "avx512f,avx512vl")]
19070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19071#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19072#[rustc_legacy_const_generics(3)]
19073pub fn _mm256_mask_rol_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19074 unsafe {
19075 static_assert_uimm_bits!(IMM8, 8);
19076 let a: i32x8 = a.as_i32x8();
19077 let r: i32x8 = vprold256(a, IMM8);
19078 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
19079 }
19080}
19081
19082/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19083///
19084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi32&expand=4681)
19085#[inline]
19086#[target_feature(enable = "avx512f,avx512vl")]
19087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19088#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19089#[rustc_legacy_const_generics(2)]
19090pub fn _mm256_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19091 unsafe {
19092 static_assert_uimm_bits!(IMM8, 8);
19093 let a: i32x8 = a.as_i32x8();
19094 let r: i32x8 = vprold256(a, IMM8);
19095 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
19096 }
19097}
19098
19099/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19100///
19101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi32&expand=4679)
19102#[inline]
19103#[target_feature(enable = "avx512f,avx512vl")]
19104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19105#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19106#[rustc_legacy_const_generics(1)]
19107pub fn _mm_rol_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19108 unsafe {
19109 static_assert_uimm_bits!(IMM8, 8);
19110 let a: i32x4 = a.as_i32x4();
19111 let r: i32x4 = vprold128(a, IMM8);
19112 transmute(src:r)
19113 }
19114}
19115
19116/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19117///
19118/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi32&expand=4677)
19119#[inline]
19120#[target_feature(enable = "avx512f,avx512vl")]
19121#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19122#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19123#[rustc_legacy_const_generics(3)]
19124pub fn _mm_mask_rol_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19125 unsafe {
19126 static_assert_uimm_bits!(IMM8, 8);
19127 let a: i32x4 = a.as_i32x4();
19128 let r: i32x4 = vprold128(a, IMM8);
19129 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
19130 }
19131}
19132
19133/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19134///
19135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi32&expand=4678)
19136#[inline]
19137#[target_feature(enable = "avx512f,avx512vl")]
19138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19139#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19140#[rustc_legacy_const_generics(2)]
19141pub fn _mm_maskz_rol_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19142 unsafe {
19143 static_assert_uimm_bits!(IMM8, 8);
19144 let a: i32x4 = a.as_i32x4();
19145 let r: i32x4 = vprold128(a, IMM8);
19146 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
19147 }
19148}
19149
19150/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19151///
19152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi32&expand=4721)
19153#[inline]
19154#[target_feature(enable = "avx512f")]
19155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19156#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19157#[rustc_legacy_const_generics(1)]
19158pub fn _mm512_ror_epi32<const IMM8: i32>(a: __m512i) -> __m512i {
19159 unsafe {
19160 static_assert_uimm_bits!(IMM8, 8);
19161 let a: i32x16 = a.as_i32x16();
19162 let r: i32x16 = vprord(a, IMM8);
19163 transmute(src:r)
19164 }
19165}
19166
19167/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19168///
19169/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi32&expand=4719)
19170#[inline]
19171#[target_feature(enable = "avx512f")]
19172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19173#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19174#[rustc_legacy_const_generics(3)]
19175pub fn _mm512_mask_ror_epi32<const IMM8: i32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19176 unsafe {
19177 static_assert_uimm_bits!(IMM8, 8);
19178 let a: i32x16 = a.as_i32x16();
19179 let r: i32x16 = vprord(a, IMM8);
19180 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
19181 }
19182}
19183
19184/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19185///
19186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi32&expand=4720)
19187#[inline]
19188#[target_feature(enable = "avx512f")]
19189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19190#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19191#[rustc_legacy_const_generics(2)]
19192pub fn _mm512_maskz_ror_epi32<const IMM8: i32>(k: __mmask16, a: __m512i) -> __m512i {
19193 unsafe {
19194 static_assert_uimm_bits!(IMM8, 8);
19195 let a: i32x16 = a.as_i32x16();
19196 let r: i32x16 = vprord(a, IMM8);
19197 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
19198 }
19199}
19200
19201/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19202///
19203/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi32&expand=4718)
19204#[inline]
19205#[target_feature(enable = "avx512f,avx512vl")]
19206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19207#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19208#[rustc_legacy_const_generics(1)]
19209pub fn _mm256_ror_epi32<const IMM8: i32>(a: __m256i) -> __m256i {
19210 unsafe {
19211 static_assert_uimm_bits!(IMM8, 8);
19212 let a: i32x8 = a.as_i32x8();
19213 let r: i32x8 = vprord256(a, IMM8);
19214 transmute(src:r)
19215 }
19216}
19217
19218/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19219///
19220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi32&expand=4716)
19221#[inline]
19222#[target_feature(enable = "avx512f,avx512vl")]
19223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19224#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19225#[rustc_legacy_const_generics(3)]
19226pub fn _mm256_mask_ror_epi32<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19227 unsafe {
19228 static_assert_uimm_bits!(IMM8, 8);
19229 let a: i32x8 = a.as_i32x8();
19230 let r: i32x8 = vprord256(a, IMM8);
19231 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
19232 }
19233}
19234
19235/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19236///
19237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi32&expand=4717)
19238#[inline]
19239#[target_feature(enable = "avx512f,avx512vl")]
19240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19241#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19242#[rustc_legacy_const_generics(2)]
19243pub fn _mm256_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19244 unsafe {
19245 static_assert_uimm_bits!(IMM8, 8);
19246 let a: i32x8 = a.as_i32x8();
19247 let r: i32x8 = vprord256(a, IMM8);
19248 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
19249 }
19250}
19251
19252/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19253///
19254/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi32&expand=4715)
19255#[inline]
19256#[target_feature(enable = "avx512f,avx512vl")]
19257#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19258#[cfg_attr(test, assert_instr(vprold, IMM8 = 1))]
19259#[rustc_legacy_const_generics(1)]
19260pub fn _mm_ror_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
19261 unsafe {
19262 static_assert_uimm_bits!(IMM8, 8);
19263 let a: i32x4 = a.as_i32x4();
19264 let r: i32x4 = vprord128(a, IMM8);
19265 transmute(src:r)
19266 }
19267}
19268
19269/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19270///
19271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi32&expand=4713)
19272#[inline]
19273#[target_feature(enable = "avx512f,avx512vl")]
19274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19275#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19276#[rustc_legacy_const_generics(3)]
19277pub fn _mm_mask_ror_epi32<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19278 unsafe {
19279 static_assert_uimm_bits!(IMM8, 8);
19280 let a: i32x4 = a.as_i32x4();
19281 let r: i32x4 = vprord128(a, IMM8);
19282 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
19283 }
19284}
19285
19286/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19287///
19288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi32&expand=4714)
19289#[inline]
19290#[target_feature(enable = "avx512f,avx512vl")]
19291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19292#[cfg_attr(test, assert_instr(vprold, IMM8 = 123))]
19293#[rustc_legacy_const_generics(2)]
19294pub fn _mm_maskz_ror_epi32<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19295 unsafe {
19296 static_assert_uimm_bits!(IMM8, 8);
19297 let a: i32x4 = a.as_i32x4();
19298 let r: i32x4 = vprord128(a, IMM8);
19299 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
19300 }
19301}
19302
19303/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19304///
19305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rol_epi64&expand=4694)
19306#[inline]
19307#[target_feature(enable = "avx512f")]
19308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19309#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19310#[rustc_legacy_const_generics(1)]
19311pub fn _mm512_rol_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19312 unsafe {
19313 static_assert_uimm_bits!(IMM8, 8);
19314 let a: i64x8 = a.as_i64x8();
19315 let r: i64x8 = vprolq(a, IMM8);
19316 transmute(src:r)
19317 }
19318}
19319
19320/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19321///
19322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rol_epi64&expand=4692)
19323#[inline]
19324#[target_feature(enable = "avx512f")]
19325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19326#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19327#[rustc_legacy_const_generics(3)]
19328pub fn _mm512_mask_rol_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19329 unsafe {
19330 static_assert_uimm_bits!(IMM8, 8);
19331 let a: i64x8 = a.as_i64x8();
19332 let r: i64x8 = vprolq(a, IMM8);
19333 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x8()))
19334 }
19335}
19336
19337/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19338///
19339/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rol_epi64&expand=4693)
19340#[inline]
19341#[target_feature(enable = "avx512f")]
19342#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19343#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19344#[rustc_legacy_const_generics(2)]
19345pub fn _mm512_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19346 unsafe {
19347 static_assert_uimm_bits!(IMM8, 8);
19348 let a: i64x8 = a.as_i64x8();
19349 let r: i64x8 = vprolq(a, IMM8);
19350 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x8::ZERO))
19351 }
19352}
19353
19354/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19355///
19356/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rol_epi64&expand=4691)
19357#[inline]
19358#[target_feature(enable = "avx512f,avx512vl")]
19359#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19360#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19361#[rustc_legacy_const_generics(1)]
19362pub fn _mm256_rol_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19363 unsafe {
19364 static_assert_uimm_bits!(IMM8, 8);
19365 let a: i64x4 = a.as_i64x4();
19366 let r: i64x4 = vprolq256(a, IMM8);
19367 transmute(src:r)
19368 }
19369}
19370
19371/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19372///
19373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rol_epi64&expand=4689)
19374#[inline]
19375#[target_feature(enable = "avx512f,avx512vl")]
19376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19377#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19378#[rustc_legacy_const_generics(3)]
19379pub fn _mm256_mask_rol_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19380 unsafe {
19381 static_assert_uimm_bits!(IMM8, 8);
19382 let a: i64x4 = a.as_i64x4();
19383 let r: i64x4 = vprolq256(a, IMM8);
19384 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x4()))
19385 }
19386}
19387
19388/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19389///
19390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rol_epi64&expand=4690)
19391#[inline]
19392#[target_feature(enable = "avx512f,avx512vl")]
19393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19394#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19395#[rustc_legacy_const_generics(2)]
19396pub fn _mm256_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19397 unsafe {
19398 static_assert_uimm_bits!(IMM8, 8);
19399 let a: i64x4 = a.as_i64x4();
19400 let r: i64x4 = vprolq256(a, IMM8);
19401 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x4::ZERO))
19402 }
19403}
19404
19405/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst.
19406///
19407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rol_epi64&expand=4688)
19408#[inline]
19409#[target_feature(enable = "avx512f,avx512vl")]
19410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19411#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19412#[rustc_legacy_const_generics(1)]
19413pub fn _mm_rol_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19414 unsafe {
19415 static_assert_uimm_bits!(IMM8, 8);
19416 let a: i64x2 = a.as_i64x2();
19417 let r: i64x2 = vprolq128(a, IMM8);
19418 transmute(src:r)
19419 }
19420}
19421
19422/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19423///
19424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rol_epi64&expand=4686)
19425#[inline]
19426#[target_feature(enable = "avx512f,avx512vl")]
19427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19428#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19429#[rustc_legacy_const_generics(3)]
19430pub fn _mm_mask_rol_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19431 unsafe {
19432 static_assert_uimm_bits!(IMM8, 8);
19433 let a: i64x2 = a.as_i64x2();
19434 let r: i64x2 = vprolq128(a, IMM8);
19435 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x2()))
19436 }
19437}
19438
19439/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19440///
19441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rol_epi64&expand=4687)
19442#[inline]
19443#[target_feature(enable = "avx512f,avx512vl")]
19444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19445#[cfg_attr(test, assert_instr(vprolq, IMM8 = 1))]
19446#[rustc_legacy_const_generics(2)]
19447pub fn _mm_maskz_rol_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19448 unsafe {
19449 static_assert_uimm_bits!(IMM8, 8);
19450 let a: i64x2 = a.as_i64x2();
19451 let r: i64x2 = vprolq128(a, IMM8);
19452 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x2::ZERO))
19453 }
19454}
19455
19456/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19457///
19458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_ror_epi64&expand=4730)
19459#[inline]
19460#[target_feature(enable = "avx512f")]
19461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19462#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19463#[rustc_legacy_const_generics(1)]
19464pub fn _mm512_ror_epi64<const IMM8: i32>(a: __m512i) -> __m512i {
19465 unsafe {
19466 static_assert_uimm_bits!(IMM8, 8);
19467 let a: i64x8 = a.as_i64x8();
19468 let r: i64x8 = vprorq(a, IMM8);
19469 transmute(src:r)
19470 }
19471}
19472
19473/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19474///
19475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_ror_epi64&expand=4728)
19476#[inline]
19477#[target_feature(enable = "avx512f")]
19478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19479#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19480#[rustc_legacy_const_generics(3)]
19481pub fn _mm512_mask_ror_epi64<const IMM8: i32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19482 unsafe {
19483 static_assert_uimm_bits!(IMM8, 8);
19484 let a: i64x8 = a.as_i64x8();
19485 let r: i64x8 = vprorq(a, IMM8);
19486 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x8()))
19487 }
19488}
19489
19490/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19491///
19492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_ror_epi64&expand=4729)
19493#[inline]
19494#[target_feature(enable = "avx512f")]
19495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19496#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19497#[rustc_legacy_const_generics(2)]
19498pub fn _mm512_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m512i) -> __m512i {
19499 unsafe {
19500 static_assert_uimm_bits!(IMM8, 8);
19501 let a: i64x8 = a.as_i64x8();
19502 let r: i64x8 = vprorq(a, IMM8);
19503 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x8::ZERO))
19504 }
19505}
19506
19507/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19508///
19509/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_ror_epi64&expand=4727)
19510#[inline]
19511#[target_feature(enable = "avx512f,avx512vl")]
19512#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19513#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19514#[rustc_legacy_const_generics(1)]
19515pub fn _mm256_ror_epi64<const IMM8: i32>(a: __m256i) -> __m256i {
19516 unsafe {
19517 static_assert_uimm_bits!(IMM8, 8);
19518 let a: i64x4 = a.as_i64x4();
19519 let r: i64x4 = vprorq256(a, IMM8);
19520 transmute(src:r)
19521 }
19522}
19523
19524/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19525///
19526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_ror_epi64&expand=4725)
19527#[inline]
19528#[target_feature(enable = "avx512f,avx512vl")]
19529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19530#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19531#[rustc_legacy_const_generics(3)]
19532pub fn _mm256_mask_ror_epi64<const IMM8: i32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19533 unsafe {
19534 static_assert_uimm_bits!(IMM8, 8);
19535 let a: i64x4 = a.as_i64x4();
19536 let r: i64x4 = vprorq256(a, IMM8);
19537 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x4()))
19538 }
19539}
19540
19541/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19542///
19543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_ror_epi64&expand=4726)
19544#[inline]
19545#[target_feature(enable = "avx512f,avx512vl")]
19546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19547#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19548#[rustc_legacy_const_generics(2)]
19549pub fn _mm256_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m256i) -> __m256i {
19550 unsafe {
19551 static_assert_uimm_bits!(IMM8, 8);
19552 let a: i64x4 = a.as_i64x4();
19553 let r: i64x4 = vprorq256(a, IMM8);
19554 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x4::ZERO))
19555 }
19556}
19557
19558/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst.
19559///
19560/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_ror_epi64&expand=4724)
19561#[inline]
19562#[target_feature(enable = "avx512f,avx512vl")]
19563#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19564#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19565#[rustc_legacy_const_generics(1)]
19566pub fn _mm_ror_epi64<const IMM8: i32>(a: __m128i) -> __m128i {
19567 unsafe {
19568 static_assert_uimm_bits!(IMM8, 8);
19569 let a: i64x2 = a.as_i64x2();
19570 let r: i64x2 = vprorq128(a, IMM8);
19571 transmute(src:r)
19572 }
19573}
19574
19575/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19576///
19577/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_ror_epi64&expand=4722)
19578#[inline]
19579#[target_feature(enable = "avx512f,avx512vl")]
19580#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19581#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19582#[rustc_legacy_const_generics(3)]
19583pub fn _mm_mask_ror_epi64<const IMM8: i32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19584 unsafe {
19585 static_assert_uimm_bits!(IMM8, 8);
19586 let a: i64x2 = a.as_i64x2();
19587 let r: i64x2 = vprorq128(a, IMM8);
19588 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x2()))
19589 }
19590}
19591
19592/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19593///
19594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_ror_epi64&expand=4723)
19595#[inline]
19596#[target_feature(enable = "avx512f,avx512vl")]
19597#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19598#[cfg_attr(test, assert_instr(vprolq, IMM8 = 15))]
19599#[rustc_legacy_const_generics(2)]
19600pub fn _mm_maskz_ror_epi64<const IMM8: i32>(k: __mmask8, a: __m128i) -> __m128i {
19601 unsafe {
19602 static_assert_uimm_bits!(IMM8, 8);
19603 let a: i64x2 = a.as_i64x2();
19604 let r: i64x2 = vprorq128(a, IMM8);
19605 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x2::ZERO))
19606 }
19607}
19608
19609/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19610///
19611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi32&expand=5310)
19612#[inline]
19613#[target_feature(enable = "avx512f")]
19614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19615#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19616#[rustc_legacy_const_generics(1)]
19617pub fn _mm512_slli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19618 unsafe {
19619 static_assert_uimm_bits!(IMM8, 8);
19620 if IMM8 >= 32 {
19621 _mm512_setzero_si512()
19622 } else {
19623 transmute(src:simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8)))
19624 }
19625 }
19626}
19627
19628/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19629///
19630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi32&expand=5308)
19631#[inline]
19632#[target_feature(enable = "avx512f")]
19633#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19634#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19635#[rustc_legacy_const_generics(3)]
19636pub fn _mm512_mask_slli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19637 unsafe {
19638 static_assert_uimm_bits!(IMM8, 8);
19639 let shf: u32x16 = if IMM8 >= 32 {
19640 u32x16::ZERO
19641 } else {
19642 simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8))
19643 };
19644 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u32x16()))
19645 }
19646}
19647
19648/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19649///
19650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi32&expand=5309)
19651#[inline]
19652#[target_feature(enable = "avx512f")]
19653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19654#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19655#[rustc_legacy_const_generics(2)]
19656pub fn _mm512_maskz_slli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19657 unsafe {
19658 static_assert_uimm_bits!(IMM8, 8);
19659 if IMM8 >= 32 {
19660 _mm512_setzero_si512()
19661 } else {
19662 let shf: u32x16 = simd_shl(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8));
19663 transmute(src:simd_select_bitmask(m:k, yes:shf, no:u32x16::ZERO))
19664 }
19665 }
19666}
19667
19668/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19669///
19670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi32&expand=5305)
19671#[inline]
19672#[target_feature(enable = "avx512f,avx512vl")]
19673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19674#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19675#[rustc_legacy_const_generics(3)]
19676pub fn _mm256_mask_slli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19677 unsafe {
19678 static_assert_uimm_bits!(IMM8, 8);
19679 let r: u32x8 = if IMM8 >= 32 {
19680 u32x8::ZERO
19681 } else {
19682 simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8))
19683 };
19684 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x8()))
19685 }
19686}
19687
19688/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19689///
19690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi32&expand=5306)
19691#[inline]
19692#[target_feature(enable = "avx512f,avx512vl")]
19693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19694#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19695#[rustc_legacy_const_generics(2)]
19696pub fn _mm256_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19697 unsafe {
19698 static_assert_uimm_bits!(IMM8, 8);
19699 if IMM8 >= 32 {
19700 _mm256_setzero_si256()
19701 } else {
19702 let r: u32x8 = simd_shl(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8));
19703 transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x8::ZERO))
19704 }
19705 }
19706}
19707
19708/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19709///
19710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi32&expand=5302)
19711#[inline]
19712#[target_feature(enable = "avx512f,avx512vl")]
19713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19714#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19715#[rustc_legacy_const_generics(3)]
19716pub fn _mm_mask_slli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19717 unsafe {
19718 static_assert_uimm_bits!(IMM8, 8);
19719 let r: u32x4 = if IMM8 >= 32 {
19720 u32x4::ZERO
19721 } else {
19722 simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8))
19723 };
19724 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x4()))
19725 }
19726}
19727
19728/// Shift packed 32-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19729///
19730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi32&expand=5303)
19731#[inline]
19732#[target_feature(enable = "avx512f,avx512vl")]
19733#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19734#[cfg_attr(test, assert_instr(vpslld, IMM8 = 5))]
19735#[rustc_legacy_const_generics(2)]
19736pub fn _mm_maskz_slli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19737 unsafe {
19738 static_assert_uimm_bits!(IMM8, 8);
19739 if IMM8 >= 32 {
19740 _mm_setzero_si128()
19741 } else {
19742 let r: u32x4 = simd_shl(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8));
19743 transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x4::ZERO))
19744 }
19745 }
19746}
19747
19748/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
19749///
19750/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi32&expand=5522)
19751#[inline]
19752#[target_feature(enable = "avx512f")]
19753#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19754#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19755#[rustc_legacy_const_generics(1)]
19756pub fn _mm512_srli_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
19757 unsafe {
19758 static_assert_uimm_bits!(IMM8, 8);
19759 if IMM8 >= 32 {
19760 _mm512_setzero_si512()
19761 } else {
19762 transmute(src:simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8)))
19763 }
19764 }
19765}
19766
19767/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19768///
19769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi32&expand=5520)
19770#[inline]
19771#[target_feature(enable = "avx512f")]
19772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19773#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19774#[rustc_legacy_const_generics(3)]
19775pub fn _mm512_mask_srli_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
19776 unsafe {
19777 static_assert_uimm_bits!(IMM8, 8);
19778 let shf: u32x16 = if IMM8 >= 32 {
19779 u32x16::ZERO
19780 } else {
19781 simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8))
19782 };
19783 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u32x16()))
19784 }
19785}
19786
19787/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19788///
19789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi32&expand=5521)
19790#[inline]
19791#[target_feature(enable = "avx512f")]
19792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19793#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19794#[rustc_legacy_const_generics(2)]
19795pub fn _mm512_maskz_srli_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
19796 unsafe {
19797 static_assert_uimm_bits!(IMM8, 8);
19798 if IMM8 >= 32 {
19799 _mm512_setzero_si512()
19800 } else {
19801 let shf: u32x16 = simd_shr(lhs:a.as_u32x16(), rhs:u32x16::splat(IMM8));
19802 transmute(src:simd_select_bitmask(m:k, yes:shf, no:u32x16::ZERO))
19803 }
19804 }
19805}
19806
19807/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19808///
19809/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi32&expand=5517)
19810#[inline]
19811#[target_feature(enable = "avx512f,avx512vl")]
19812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19813#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19814#[rustc_legacy_const_generics(3)]
19815pub fn _mm256_mask_srli_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19816 unsafe {
19817 static_assert_uimm_bits!(IMM8, 8);
19818 let r: u32x8 = if IMM8 >= 32 {
19819 u32x8::ZERO
19820 } else {
19821 simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8))
19822 };
19823 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x8()))
19824 }
19825}
19826
19827/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19828///
19829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi32&expand=5518)
19830#[inline]
19831#[target_feature(enable = "avx512f,avx512vl")]
19832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19833#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19834#[rustc_legacy_const_generics(2)]
19835pub fn _mm256_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19836 unsafe {
19837 static_assert_uimm_bits!(IMM8, 8);
19838 if IMM8 >= 32 {
19839 _mm256_setzero_si256()
19840 } else {
19841 let r: u32x8 = simd_shr(lhs:a.as_u32x8(), rhs:u32x8::splat(IMM8));
19842 transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x8::ZERO))
19843 }
19844 }
19845}
19846
19847/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19848///
19849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi32&expand=5514)
19850#[inline]
19851#[target_feature(enable = "avx512f,avx512vl")]
19852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19853#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19854#[rustc_legacy_const_generics(3)]
19855pub fn _mm_mask_srli_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19856 unsafe {
19857 static_assert_uimm_bits!(IMM8, 8);
19858 let r: u32x4 = if IMM8 >= 32 {
19859 u32x4::ZERO
19860 } else {
19861 simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8))
19862 };
19863 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u32x4()))
19864 }
19865}
19866
19867/// Shift packed 32-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19868///
19869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi32&expand=5515)
19870#[inline]
19871#[target_feature(enable = "avx512f,avx512vl")]
19872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19873#[cfg_attr(test, assert_instr(vpsrld, IMM8 = 1))]
19874#[rustc_legacy_const_generics(2)]
19875pub fn _mm_maskz_srli_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
19876 unsafe {
19877 static_assert_uimm_bits!(IMM8, 8);
19878 if IMM8 >= 32 {
19879 _mm_setzero_si128()
19880 } else {
19881 let r: u32x4 = simd_shr(lhs:a.as_u32x4(), rhs:u32x4::splat(IMM8));
19882 transmute(src:simd_select_bitmask(m:k, yes:r, no:u32x4::ZERO))
19883 }
19884 }
19885}
19886
19887/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst.
19888///
19889/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_slli_epi64&expand=5319)
19890#[inline]
19891#[target_feature(enable = "avx512f")]
19892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19893#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19894#[rustc_legacy_const_generics(1)]
19895pub fn _mm512_slli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
19896 unsafe {
19897 static_assert_uimm_bits!(IMM8, 8);
19898 if IMM8 >= 64 {
19899 _mm512_setzero_si512()
19900 } else {
19901 transmute(src:simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64)))
19902 }
19903 }
19904}
19905
19906/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19907///
19908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_slli_epi64&expand=5317)
19909#[inline]
19910#[target_feature(enable = "avx512f")]
19911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19912#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19913#[rustc_legacy_const_generics(3)]
19914pub fn _mm512_mask_slli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
19915 unsafe {
19916 static_assert_uimm_bits!(IMM8, 8);
19917 let shf: u64x8 = if IMM8 >= 64 {
19918 u64x8::ZERO
19919 } else {
19920 simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64))
19921 };
19922 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u64x8()))
19923 }
19924}
19925
19926/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19927///
19928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_slli_epi64&expand=5318)
19929#[inline]
19930#[target_feature(enable = "avx512f")]
19931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19932#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19933#[rustc_legacy_const_generics(2)]
19934pub fn _mm512_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
19935 unsafe {
19936 static_assert_uimm_bits!(IMM8, 8);
19937 if IMM8 >= 64 {
19938 _mm512_setzero_si512()
19939 } else {
19940 let shf: u64x8 = simd_shl(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64));
19941 transmute(src:simd_select_bitmask(m:k, yes:shf, no:u64x8::ZERO))
19942 }
19943 }
19944}
19945
19946/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19947///
19948/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_slli_epi64&expand=5314)
19949#[inline]
19950#[target_feature(enable = "avx512f,avx512vl")]
19951#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19952#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19953#[rustc_legacy_const_generics(3)]
19954pub fn _mm256_mask_slli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
19955 unsafe {
19956 static_assert_uimm_bits!(IMM8, 8);
19957 let r: u64x4 = if IMM8 >= 64 {
19958 u64x4::ZERO
19959 } else {
19960 simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64))
19961 };
19962 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x4()))
19963 }
19964}
19965
19966/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
19967///
19968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_slli_epi64&expand=5315)
19969#[inline]
19970#[target_feature(enable = "avx512f,avx512vl")]
19971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19972#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19973#[rustc_legacy_const_generics(2)]
19974pub fn _mm256_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
19975 unsafe {
19976 static_assert_uimm_bits!(IMM8, 8);
19977 if IMM8 >= 64 {
19978 _mm256_setzero_si256()
19979 } else {
19980 let r: u64x4 = simd_shl(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64));
19981 transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x4::ZERO))
19982 }
19983 }
19984}
19985
19986/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
19987///
19988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_slli_epi64&expand=5311)
19989#[inline]
19990#[target_feature(enable = "avx512f,avx512vl")]
19991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
19992#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
19993#[rustc_legacy_const_generics(3)]
19994pub fn _mm_mask_slli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
19995 unsafe {
19996 static_assert_uimm_bits!(IMM8, 8);
19997 let r: u64x2 = if IMM8 >= 64 {
19998 u64x2::ZERO
19999 } else {
20000 simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))
20001 };
20002 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x2()))
20003 }
20004}
20005
20006/// Shift packed 64-bit integers in a left by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20007///
20008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_slli_epi64&expand=5312)
20009#[inline]
20010#[target_feature(enable = "avx512f,avx512vl")]
20011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20012#[cfg_attr(test, assert_instr(vpsllq, IMM8 = 5))]
20013#[rustc_legacy_const_generics(2)]
20014pub fn _mm_maskz_slli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20015 unsafe {
20016 static_assert_uimm_bits!(IMM8, 8);
20017 if IMM8 >= 64 {
20018 _mm_setzero_si128()
20019 } else {
20020 let r: u64x2 = simd_shl(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64));
20021 transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x2::ZERO))
20022 }
20023 }
20024}
20025
20026/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst.
20027///
20028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srli_epi64&expand=5531)
20029#[inline]
20030#[target_feature(enable = "avx512f")]
20031#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20032#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20033#[rustc_legacy_const_generics(1)]
20034pub fn _mm512_srli_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20035 unsafe {
20036 static_assert_uimm_bits!(IMM8, 8);
20037 if IMM8 >= 64 {
20038 _mm512_setzero_si512()
20039 } else {
20040 transmute(src:simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64)))
20041 }
20042 }
20043}
20044
20045/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20046///
20047/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srli_epi64&expand=5529)
20048#[inline]
20049#[target_feature(enable = "avx512f")]
20050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20051#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20052#[rustc_legacy_const_generics(3)]
20053pub fn _mm512_mask_srli_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
20054 unsafe {
20055 static_assert_uimm_bits!(IMM8, 8);
20056 let shf: u64x8 = if IMM8 >= 64 {
20057 u64x8::ZERO
20058 } else {
20059 simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64))
20060 };
20061 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_u64x8()))
20062 }
20063}
20064
20065/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20066///
20067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srli_epi64&expand=5530)
20068#[inline]
20069#[target_feature(enable = "avx512f")]
20070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20071#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20072#[rustc_legacy_const_generics(2)]
20073pub fn _mm512_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20074 unsafe {
20075 static_assert_uimm_bits!(IMM8, 8);
20076 if IMM8 >= 64 {
20077 _mm512_setzero_si512()
20078 } else {
20079 let shf: u64x8 = simd_shr(lhs:a.as_u64x8(), rhs:u64x8::splat(IMM8 as u64));
20080 transmute(src:simd_select_bitmask(m:k, yes:shf, no:u64x8::ZERO))
20081 }
20082 }
20083}
20084
20085/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20086///
20087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srli_epi64&expand=5526)
20088#[inline]
20089#[target_feature(enable = "avx512f,avx512vl")]
20090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20091#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20092#[rustc_legacy_const_generics(3)]
20093pub fn _mm256_mask_srli_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20094 unsafe {
20095 static_assert_uimm_bits!(IMM8, 8);
20096 let r: u64x4 = if IMM8 >= 64 {
20097 u64x4::ZERO
20098 } else {
20099 simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64))
20100 };
20101 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x4()))
20102 }
20103}
20104
20105/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20106///
20107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srli_epi64&expand=5527)
20108#[inline]
20109#[target_feature(enable = "avx512f,avx512vl")]
20110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20111#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20112#[rustc_legacy_const_generics(2)]
20113pub fn _mm256_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20114 unsafe {
20115 static_assert_uimm_bits!(IMM8, 8);
20116 if IMM8 >= 64 {
20117 _mm256_setzero_si256()
20118 } else {
20119 let r: u64x4 = simd_shr(lhs:a.as_u64x4(), rhs:u64x4::splat(IMM8 as u64));
20120 transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x4::ZERO))
20121 }
20122 }
20123}
20124
20125/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20126///
20127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srli_epi64&expand=5523)
20128#[inline]
20129#[target_feature(enable = "avx512f,avx512vl")]
20130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20131#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20132#[rustc_legacy_const_generics(3)]
20133pub fn _mm_mask_srli_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20134 unsafe {
20135 static_assert_uimm_bits!(IMM8, 8);
20136 let r: u64x2 = if IMM8 >= 64 {
20137 u64x2::ZERO
20138 } else {
20139 simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64))
20140 };
20141 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_u64x2()))
20142 }
20143}
20144
20145/// Shift packed 64-bit integers in a right by imm8 while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20146///
20147/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srli_epi64&expand=5524)
20148#[inline]
20149#[target_feature(enable = "avx512f,avx512vl")]
20150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20151#[cfg_attr(test, assert_instr(vpsrlq, IMM8 = 1))]
20152#[rustc_legacy_const_generics(2)]
20153pub fn _mm_maskz_srli_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20154 unsafe {
20155 static_assert_uimm_bits!(IMM8, 8);
20156 if IMM8 >= 64 {
20157 _mm_setzero_si128()
20158 } else {
20159 let r: u64x2 = simd_shr(lhs:a.as_u64x2(), rhs:u64x2::splat(IMM8 as u64));
20160 transmute(src:simd_select_bitmask(m:k, yes:r, no:u64x2::ZERO))
20161 }
20162 }
20163}
20164
20165/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst.
20166///
20167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi32&expand=5280)
20168#[inline]
20169#[target_feature(enable = "avx512f")]
20170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20171#[cfg_attr(test, assert_instr(vpslld))]
20172pub fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
20173 unsafe { transmute(src:vpslld(a.as_i32x16(), count.as_i32x4())) }
20174}
20175
20176/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20177///
20178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi32&expand=5278)
20179#[inline]
20180#[target_feature(enable = "avx512f")]
20181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20182#[cfg_attr(test, assert_instr(vpslld))]
20183pub fn _mm512_mask_sll_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20184 unsafe {
20185 let shf: i32x16 = _mm512_sll_epi32(a, count).as_i32x16();
20186 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
20187 }
20188}
20189
20190/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20191///
20192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi32&expand=5279)
20193#[inline]
20194#[target_feature(enable = "avx512f")]
20195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20196#[cfg_attr(test, assert_instr(vpslld))]
20197pub fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20198 unsafe {
20199 let shf: i32x16 = _mm512_sll_epi32(a, count).as_i32x16();
20200 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
20201 }
20202}
20203
20204/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20205///
20206/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi32&expand=5275)
20207#[inline]
20208#[target_feature(enable = "avx512f,avx512vl")]
20209#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20210#[cfg_attr(test, assert_instr(vpslld))]
20211pub fn _mm256_mask_sll_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20212 unsafe {
20213 let shf: i32x8 = _mm256_sll_epi32(a, count).as_i32x8();
20214 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
20215 }
20216}
20217
20218/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20219///
20220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi32&expand=5276)
20221#[inline]
20222#[target_feature(enable = "avx512f,avx512vl")]
20223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20224#[cfg_attr(test, assert_instr(vpslld))]
20225pub fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20226 unsafe {
20227 let shf: i32x8 = _mm256_sll_epi32(a, count).as_i32x8();
20228 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
20229 }
20230}
20231
20232/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20233///
20234/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi32&expand=5272)
20235#[inline]
20236#[target_feature(enable = "avx512f,avx512vl")]
20237#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20238#[cfg_attr(test, assert_instr(vpslld))]
20239pub fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20240 unsafe {
20241 let shf: i32x4 = _mm_sll_epi32(a, count).as_i32x4();
20242 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
20243 }
20244}
20245
20246/// Shift packed 32-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20247///
20248/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi32&expand=5273)
20249#[inline]
20250#[target_feature(enable = "avx512f,avx512vl")]
20251#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20252#[cfg_attr(test, assert_instr(vpslld))]
20253pub fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20254 unsafe {
20255 let shf: i32x4 = _mm_sll_epi32(a, count).as_i32x4();
20256 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
20257 }
20258}
20259
20260/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst.
20261///
20262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi32&expand=5492)
20263#[inline]
20264#[target_feature(enable = "avx512f")]
20265#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20266#[cfg_attr(test, assert_instr(vpsrld))]
20267pub fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
20268 unsafe { transmute(src:vpsrld(a.as_i32x16(), count.as_i32x4())) }
20269}
20270
20271/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20272///
20273/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi32&expand=5490)
20274#[inline]
20275#[target_feature(enable = "avx512f")]
20276#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20277#[cfg_attr(test, assert_instr(vpsrld))]
20278pub fn _mm512_mask_srl_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20279 unsafe {
20280 let shf: i32x16 = _mm512_srl_epi32(a, count).as_i32x16();
20281 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
20282 }
20283}
20284
20285/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20286///
20287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi32&expand=5491)
20288#[inline]
20289#[target_feature(enable = "avx512f")]
20290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20291#[cfg_attr(test, assert_instr(vpsrld))]
20292pub fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20293 unsafe {
20294 let shf: i32x16 = _mm512_srl_epi32(a, count).as_i32x16();
20295 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
20296 }
20297}
20298
20299/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20300///
20301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi32&expand=5487)
20302#[inline]
20303#[target_feature(enable = "avx512f,avx512vl")]
20304#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20305#[cfg_attr(test, assert_instr(vpsrld))]
20306pub fn _mm256_mask_srl_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20307 unsafe {
20308 let shf: i32x8 = _mm256_srl_epi32(a, count).as_i32x8();
20309 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
20310 }
20311}
20312
20313/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20314///
20315/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi32&expand=5488)
20316#[inline]
20317#[target_feature(enable = "avx512f,avx512vl")]
20318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20319#[cfg_attr(test, assert_instr(vpsrld))]
20320pub fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20321 unsafe {
20322 let shf: i32x8 = _mm256_srl_epi32(a, count).as_i32x8();
20323 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
20324 }
20325}
20326
20327/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20328///
20329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi32&expand=5484)
20330#[inline]
20331#[target_feature(enable = "avx512f,avx512vl")]
20332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20333#[cfg_attr(test, assert_instr(vpsrld))]
20334pub fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20335 unsafe {
20336 let shf: i32x4 = _mm_srl_epi32(a, count).as_i32x4();
20337 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
20338 }
20339}
20340
20341/// Shift packed 32-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20342///
20343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi32&expand=5485)
20344#[inline]
20345#[target_feature(enable = "avx512f,avx512vl")]
20346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20347#[cfg_attr(test, assert_instr(vpsrld))]
20348pub fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20349 unsafe {
20350 let shf: i32x4 = _mm_srl_epi32(a, count).as_i32x4();
20351 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
20352 }
20353}
20354
20355/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst.
20356///
20357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sll_epi64&expand=5289)
20358#[inline]
20359#[target_feature(enable = "avx512f")]
20360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20361#[cfg_attr(test, assert_instr(vpsllq))]
20362pub fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
20363 unsafe { transmute(src:vpsllq(a.as_i64x8(), count.as_i64x2())) }
20364}
20365
20366/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20367///
20368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sll_epi64&expand=5287)
20369#[inline]
20370#[target_feature(enable = "avx512f")]
20371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20372#[cfg_attr(test, assert_instr(vpsllq))]
20373pub fn _mm512_mask_sll_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20374 unsafe {
20375 let shf: i64x8 = _mm512_sll_epi64(a, count).as_i64x8();
20376 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20377 }
20378}
20379
20380/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20381///
20382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sll_epi64&expand=5288)
20383#[inline]
20384#[target_feature(enable = "avx512f")]
20385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20386#[cfg_attr(test, assert_instr(vpsllq))]
20387pub fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20388 unsafe {
20389 let shf: i64x8 = _mm512_sll_epi64(a, count).as_i64x8();
20390 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
20391 }
20392}
20393
20394/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20395///
20396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sll_epi64&expand=5284)
20397#[inline]
20398#[target_feature(enable = "avx512f,avx512vl")]
20399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20400#[cfg_attr(test, assert_instr(vpsllq))]
20401pub fn _mm256_mask_sll_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20402 unsafe {
20403 let shf: i64x4 = _mm256_sll_epi64(a, count).as_i64x4();
20404 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20405 }
20406}
20407
20408/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20409///
20410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sll_epi64&expand=5285)
20411#[inline]
20412#[target_feature(enable = "avx512f,avx512vl")]
20413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20414#[cfg_attr(test, assert_instr(vpsllq))]
20415pub fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20416 unsafe {
20417 let shf: i64x4 = _mm256_sll_epi64(a, count).as_i64x4();
20418 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
20419 }
20420}
20421
20422/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20423///
20424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sll_epi64&expand=5281)
20425#[inline]
20426#[target_feature(enable = "avx512f,avx512vl")]
20427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20428#[cfg_attr(test, assert_instr(vpsllq))]
20429pub fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20430 unsafe {
20431 let shf: i64x2 = _mm_sll_epi64(a, count).as_i64x2();
20432 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
20433 }
20434}
20435
20436/// Shift packed 64-bit integers in a left by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20437///
20438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sll_epi64&expand=5282)
20439#[inline]
20440#[target_feature(enable = "avx512f,avx512vl")]
20441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20442#[cfg_attr(test, assert_instr(vpsllq))]
20443pub fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20444 unsafe {
20445 let shf: i64x2 = _mm_sll_epi64(a, count).as_i64x2();
20446 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
20447 }
20448}
20449
20450/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst.
20451///
20452/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srl_epi64&expand=5501)
20453#[inline]
20454#[target_feature(enable = "avx512f")]
20455#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20456#[cfg_attr(test, assert_instr(vpsrlq))]
20457pub fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
20458 unsafe { transmute(src:vpsrlq(a.as_i64x8(), count.as_i64x2())) }
20459}
20460
20461/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20462///
20463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srl_epi64&expand=5499)
20464#[inline]
20465#[target_feature(enable = "avx512f")]
20466#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20467#[cfg_attr(test, assert_instr(vpsrlq))]
20468pub fn _mm512_mask_srl_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20469 unsafe {
20470 let shf: i64x8 = _mm512_srl_epi64(a, count).as_i64x8();
20471 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20472 }
20473}
20474
20475/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20476///
20477/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srl_epi64&expand=5500)
20478#[inline]
20479#[target_feature(enable = "avx512f")]
20480#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20481#[cfg_attr(test, assert_instr(vpsrlq))]
20482pub fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20483 unsafe {
20484 let shf: i64x8 = _mm512_srl_epi64(a, count).as_i64x8();
20485 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
20486 }
20487}
20488
20489/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20490///
20491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srl_epi64&expand=5496)
20492#[inline]
20493#[target_feature(enable = "avx512f,avx512vl")]
20494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20495#[cfg_attr(test, assert_instr(vpsrlq))]
20496pub fn _mm256_mask_srl_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20497 unsafe {
20498 let shf: i64x4 = _mm256_srl_epi64(a, count).as_i64x4();
20499 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20500 }
20501}
20502
20503/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20504///
20505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srl_epi64&expand=5497)
20506#[inline]
20507#[target_feature(enable = "avx512f,avx512vl")]
20508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20509#[cfg_attr(test, assert_instr(vpsrlq))]
20510pub fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20511 unsafe {
20512 let shf: i64x4 = _mm256_srl_epi64(a, count).as_i64x4();
20513 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
20514 }
20515}
20516
20517/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20518///
20519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srl_epi64&expand=5493)
20520#[inline]
20521#[target_feature(enable = "avx512f,avx512vl")]
20522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20523#[cfg_attr(test, assert_instr(vpsrlq))]
20524pub fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20525 unsafe {
20526 let shf: i64x2 = _mm_srl_epi64(a, count).as_i64x2();
20527 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
20528 }
20529}
20530
20531/// Shift packed 64-bit integers in a right by count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20532///
20533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srl_epi64&expand=5494)
20534#[inline]
20535#[target_feature(enable = "avx512f,avx512vl")]
20536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20537#[cfg_attr(test, assert_instr(vpsrlq))]
20538pub fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20539 unsafe {
20540 let shf: i64x2 = _mm_srl_epi64(a, count).as_i64x2();
20541 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
20542 }
20543}
20544
20545/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20546///
20547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi32&expand=5407)
20548#[inline]
20549#[target_feature(enable = "avx512f")]
20550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20551#[cfg_attr(test, assert_instr(vpsrad))]
20552pub fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
20553 unsafe { transmute(src:vpsrad(a.as_i32x16(), count.as_i32x4())) }
20554}
20555
20556/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20557///
20558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi32&expand=5405)
20559#[inline]
20560#[target_feature(enable = "avx512f")]
20561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20562#[cfg_attr(test, assert_instr(vpsrad))]
20563pub fn _mm512_mask_sra_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20564 unsafe {
20565 let shf: i32x16 = _mm512_sra_epi32(a, count).as_i32x16();
20566 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
20567 }
20568}
20569
20570/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20571///
20572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi32&expand=5406)
20573#[inline]
20574#[target_feature(enable = "avx512f")]
20575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20576#[cfg_attr(test, assert_instr(vpsrad))]
20577pub fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
20578 unsafe {
20579 let shf: i32x16 = _mm512_sra_epi32(a, count).as_i32x16();
20580 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
20581 }
20582}
20583
20584/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20585///
20586/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi32&expand=5402)
20587#[inline]
20588#[target_feature(enable = "avx512f,avx512vl")]
20589#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20590#[cfg_attr(test, assert_instr(vpsrad))]
20591pub fn _mm256_mask_sra_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20592 unsafe {
20593 let shf: i32x8 = _mm256_sra_epi32(a, count).as_i32x8();
20594 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
20595 }
20596}
20597
20598/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20599///
20600/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi32&expand=5403)
20601#[inline]
20602#[target_feature(enable = "avx512f,avx512vl")]
20603#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20604#[cfg_attr(test, assert_instr(vpsrad))]
20605pub fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20606 unsafe {
20607 let shf: i32x8 = _mm256_sra_epi32(a, count).as_i32x8();
20608 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
20609 }
20610}
20611
20612/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20613///
20614/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi32&expand=5399)
20615#[inline]
20616#[target_feature(enable = "avx512f,avx512vl")]
20617#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20618#[cfg_attr(test, assert_instr(vpsrad))]
20619pub fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20620 unsafe {
20621 let shf: i32x4 = _mm_sra_epi32(a, count).as_i32x4();
20622 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
20623 }
20624}
20625
20626/// Shift packed 32-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20627///
20628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi32&expand=5400)
20629#[inline]
20630#[target_feature(enable = "avx512f,avx512vl")]
20631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20632#[cfg_attr(test, assert_instr(vpsrad))]
20633pub fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20634 unsafe {
20635 let shf: i32x4 = _mm_sra_epi32(a, count).as_i32x4();
20636 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
20637 }
20638}
20639
20640/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20641///
20642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sra_epi64&expand=5416)
20643#[inline]
20644#[target_feature(enable = "avx512f")]
20645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20646#[cfg_attr(test, assert_instr(vpsraq))]
20647pub fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
20648 unsafe { transmute(src:vpsraq(a.as_i64x8(), count.as_i64x2())) }
20649}
20650
20651/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20652///
20653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sra_epi64&expand=5414)
20654#[inline]
20655#[target_feature(enable = "avx512f")]
20656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20657#[cfg_attr(test, assert_instr(vpsraq))]
20658pub fn _mm512_mask_sra_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20659 unsafe {
20660 let shf: i64x8 = _mm512_sra_epi64(a, count).as_i64x8();
20661 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20662 }
20663}
20664
20665/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20666///
20667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sra_epi64&expand=5415)
20668#[inline]
20669#[target_feature(enable = "avx512f")]
20670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20671#[cfg_attr(test, assert_instr(vpsraq))]
20672pub fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
20673 unsafe {
20674 let shf: i64x8 = _mm512_sra_epi64(a, count).as_i64x8();
20675 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
20676 }
20677}
20678
20679/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20680///
20681/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sra_epi64&expand=5413)
20682#[inline]
20683#[target_feature(enable = "avx512f,avx512vl")]
20684#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20685#[cfg_attr(test, assert_instr(vpsraq))]
20686pub fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
20687 unsafe { transmute(src:vpsraq256(a.as_i64x4(), count.as_i64x2())) }
20688}
20689
20690/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20691///
20692/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sra_epi64&expand=5411)
20693#[inline]
20694#[target_feature(enable = "avx512f,avx512vl")]
20695#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20696#[cfg_attr(test, assert_instr(vpsraq))]
20697pub fn _mm256_mask_sra_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20698 unsafe {
20699 let shf: i64x4 = _mm256_sra_epi64(a, count).as_i64x4();
20700 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20701 }
20702}
20703
20704/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20705///
20706/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sra_epi64&expand=5412)
20707#[inline]
20708#[target_feature(enable = "avx512f,avx512vl")]
20709#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20710#[cfg_attr(test, assert_instr(vpsraq))]
20711pub fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
20712 unsafe {
20713 let shf: i64x4 = _mm256_sra_epi64(a, count).as_i64x4();
20714 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
20715 }
20716}
20717
20718/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst.
20719///
20720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_sra_epi64&expand=5410)
20721#[inline]
20722#[target_feature(enable = "avx512f,avx512vl")]
20723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20724#[cfg_attr(test, assert_instr(vpsraq))]
20725pub fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
20726 unsafe { transmute(src:vpsraq128(a.as_i64x2(), count.as_i64x2())) }
20727}
20728
20729/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20730///
20731/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sra_epi64&expand=5408)
20732#[inline]
20733#[target_feature(enable = "avx512f,avx512vl")]
20734#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20735#[cfg_attr(test, assert_instr(vpsraq))]
20736pub fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20737 unsafe {
20738 let shf: i64x2 = _mm_sra_epi64(a, count).as_i64x2();
20739 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
20740 }
20741}
20742
20743/// Shift packed 64-bit integers in a right by count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20744///
20745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sra_epi64&expand=5409)
20746#[inline]
20747#[target_feature(enable = "avx512f,avx512vl")]
20748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20749#[cfg_attr(test, assert_instr(vpsraq))]
20750pub fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
20751 unsafe {
20752 let shf: i64x2 = _mm_sra_epi64(a, count).as_i64x2();
20753 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
20754 }
20755}
20756
20757/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20758///
20759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi32&expand=5436)
20760#[inline]
20761#[target_feature(enable = "avx512f")]
20762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20763#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20764#[rustc_legacy_const_generics(1)]
20765pub fn _mm512_srai_epi32<const IMM8: u32>(a: __m512i) -> __m512i {
20766 unsafe {
20767 static_assert_uimm_bits!(IMM8, 8);
20768 transmute(src:simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(31) as i32)))
20769 }
20770}
20771
20772/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20773///
20774/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi32&expand=5434)
20775#[inline]
20776#[target_feature(enable = "avx512f")]
20777#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20778#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20779#[rustc_legacy_const_generics(3)]
20780pub fn _mm512_mask_srai_epi32<const IMM8: u32>(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
20781 unsafe {
20782 static_assert_uimm_bits!(IMM8, 8);
20783 let r: i32x16 = simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(31) as i32));
20784 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
20785 }
20786}
20787
20788/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20789///
20790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi32&expand=5435)
20791#[inline]
20792#[target_feature(enable = "avx512f")]
20793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20794#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20795#[rustc_legacy_const_generics(2)]
20796pub fn _mm512_maskz_srai_epi32<const IMM8: u32>(k: __mmask16, a: __m512i) -> __m512i {
20797 unsafe {
20798 static_assert_uimm_bits!(IMM8, 8);
20799 let r: i32x16 = simd_shr(lhs:a.as_i32x16(), rhs:i32x16::splat(IMM8.min(31) as i32));
20800 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
20801 }
20802}
20803
20804/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20805///
20806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi32&expand=5431)
20807#[inline]
20808#[target_feature(enable = "avx512f,avx512vl")]
20809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20810#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20811#[rustc_legacy_const_generics(3)]
20812pub fn _mm256_mask_srai_epi32<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20813 unsafe {
20814 let r: i32x8 = simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(31) as i32));
20815 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
20816 }
20817}
20818
20819/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20820///
20821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi32&expand=5432)
20822#[inline]
20823#[target_feature(enable = "avx512f,avx512vl")]
20824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20825#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20826#[rustc_legacy_const_generics(2)]
20827pub fn _mm256_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20828 unsafe {
20829 let r: i32x8 = simd_shr(lhs:a.as_i32x8(), rhs:i32x8::splat(IMM8.min(31) as i32));
20830 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
20831 }
20832}
20833
20834/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20835///
20836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi32&expand=5428)
20837#[inline]
20838#[target_feature(enable = "avx512f,avx512vl")]
20839#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20840#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20841#[rustc_legacy_const_generics(3)]
20842pub fn _mm_mask_srai_epi32<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20843 unsafe {
20844 let r: i32x4 = simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(31) as i32));
20845 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
20846 }
20847}
20848
20849/// Shift packed 32-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20850///
20851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi32&expand=5429)
20852#[inline]
20853#[target_feature(enable = "avx512f,avx512vl")]
20854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20855#[cfg_attr(test, assert_instr(vpsrad, IMM8 = 1))]
20856#[rustc_legacy_const_generics(2)]
20857pub fn _mm_maskz_srai_epi32<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20858 unsafe {
20859 let r: i32x4 = simd_shr(lhs:a.as_i32x4(), rhs:i32x4::splat(IMM8.min(31) as i32));
20860 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
20861 }
20862}
20863
20864/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20865///
20866/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srai_epi64&expand=5445)
20867#[inline]
20868#[target_feature(enable = "avx512f")]
20869#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20870#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20871#[rustc_legacy_const_generics(1)]
20872pub fn _mm512_srai_epi64<const IMM8: u32>(a: __m512i) -> __m512i {
20873 unsafe {
20874 static_assert_uimm_bits!(IMM8, 8);
20875 transmute(src:simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(63) as i64)))
20876 }
20877}
20878
20879/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20880///
20881/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srai_epi64&expand=5443)
20882#[inline]
20883#[target_feature(enable = "avx512f")]
20884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20885#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20886#[rustc_legacy_const_generics(3)]
20887pub fn _mm512_mask_srai_epi64<const IMM8: u32>(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
20888 unsafe {
20889 static_assert_uimm_bits!(IMM8, 8);
20890 let shf: i64x8 = simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(63) as i64));
20891 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
20892 }
20893}
20894
20895/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20896///
20897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srai_epi64&expand=5444)
20898#[inline]
20899#[target_feature(enable = "avx512f")]
20900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20901#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20902#[rustc_legacy_const_generics(2)]
20903pub fn _mm512_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m512i) -> __m512i {
20904 unsafe {
20905 static_assert_uimm_bits!(IMM8, 8);
20906 let shf: i64x8 = simd_shr(lhs:a.as_i64x8(), rhs:i64x8::splat(IMM8.min(63) as i64));
20907 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
20908 }
20909}
20910
20911/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20912///
20913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srai_epi64&expand=5442)
20914#[inline]
20915#[target_feature(enable = "avx512f,avx512vl")]
20916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20917#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20918#[rustc_legacy_const_generics(1)]
20919pub fn _mm256_srai_epi64<const IMM8: u32>(a: __m256i) -> __m256i {
20920 unsafe {
20921 static_assert_uimm_bits!(IMM8, 8);
20922 transmute(src:simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(63) as i64)))
20923 }
20924}
20925
20926/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20927///
20928/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srai_epi64&expand=5440)
20929#[inline]
20930#[target_feature(enable = "avx512f,avx512vl")]
20931#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20932#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20933#[rustc_legacy_const_generics(3)]
20934pub fn _mm256_mask_srai_epi64<const IMM8: u32>(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
20935 unsafe {
20936 static_assert_uimm_bits!(IMM8, 8);
20937 let shf: i64x4 = simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(63) as i64));
20938 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
20939 }
20940}
20941
20942/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20943///
20944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srai_epi64&expand=5441)
20945#[inline]
20946#[target_feature(enable = "avx512f,avx512vl")]
20947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20948#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20949#[rustc_legacy_const_generics(2)]
20950pub fn _mm256_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m256i) -> __m256i {
20951 unsafe {
20952 static_assert_uimm_bits!(IMM8, 8);
20953 let shf: i64x4 = simd_shr(lhs:a.as_i64x4(), rhs:i64x4::splat(IMM8.min(63) as i64));
20954 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
20955 }
20956}
20957
20958/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst.
20959///
20960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srai_epi64&expand=5439)
20961#[inline]
20962#[target_feature(enable = "avx512f,avx512vl")]
20963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20964#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20965#[rustc_legacy_const_generics(1)]
20966pub fn _mm_srai_epi64<const IMM8: u32>(a: __m128i) -> __m128i {
20967 unsafe {
20968 static_assert_uimm_bits!(IMM8, 8);
20969 transmute(src:simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(63) as i64)))
20970 }
20971}
20972
20973/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
20974///
20975/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srai_epi64&expand=5437)
20976#[inline]
20977#[target_feature(enable = "avx512f,avx512vl")]
20978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20979#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20980#[rustc_legacy_const_generics(3)]
20981pub fn _mm_mask_srai_epi64<const IMM8: u32>(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
20982 unsafe {
20983 static_assert_uimm_bits!(IMM8, 8);
20984 let shf: i64x2 = simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(63) as i64));
20985 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
20986 }
20987}
20988
20989/// Shift packed 64-bit integers in a right by imm8 while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
20990///
20991/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srai_epi64&expand=5438)
20992#[inline]
20993#[target_feature(enable = "avx512f,avx512vl")]
20994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
20995#[cfg_attr(test, assert_instr(vpsraq, IMM8 = 1))]
20996#[rustc_legacy_const_generics(2)]
20997pub fn _mm_maskz_srai_epi64<const IMM8: u32>(k: __mmask8, a: __m128i) -> __m128i {
20998 unsafe {
20999 static_assert_uimm_bits!(IMM8, 8);
21000 let shf: i64x2 = simd_shr(lhs:a.as_i64x2(), rhs:i64x2::splat(IMM8.min(63) as i64));
21001 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
21002 }
21003}
21004
21005/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21006///
21007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi32&expand=5465)
21008#[inline]
21009#[target_feature(enable = "avx512f")]
21010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21011#[cfg_attr(test, assert_instr(vpsravd))]
21012pub fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
21013 unsafe { transmute(src:vpsravd(a.as_i32x16(), count.as_i32x16())) }
21014}
21015
21016/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21017///
21018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi32&expand=5463)
21019#[inline]
21020#[target_feature(enable = "avx512f")]
21021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21022#[cfg_attr(test, assert_instr(vpsravd))]
21023pub fn _mm512_mask_srav_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21024 unsafe {
21025 let shf: i32x16 = _mm512_srav_epi32(a, count).as_i32x16();
21026 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
21027 }
21028}
21029
21030/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21031///
21032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi32&expand=5464)
21033#[inline]
21034#[target_feature(enable = "avx512f")]
21035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21036#[cfg_attr(test, assert_instr(vpsravd))]
21037pub fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21038 unsafe {
21039 let shf: i32x16 = _mm512_srav_epi32(a, count).as_i32x16();
21040 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
21041 }
21042}
21043
21044/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21045///
21046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi32&expand=5460)
21047#[inline]
21048#[target_feature(enable = "avx512f,avx512vl")]
21049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21050#[cfg_attr(test, assert_instr(vpsravd))]
21051pub fn _mm256_mask_srav_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21052 unsafe {
21053 let shf: i32x8 = _mm256_srav_epi32(a, count).as_i32x8();
21054 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
21055 }
21056}
21057
21058/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21059///
21060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi32&expand=5461)
21061#[inline]
21062#[target_feature(enable = "avx512f,avx512vl")]
21063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21064#[cfg_attr(test, assert_instr(vpsravd))]
21065pub fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21066 unsafe {
21067 let shf: i32x8 = _mm256_srav_epi32(a, count).as_i32x8();
21068 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
21069 }
21070}
21071
21072/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21073///
21074/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi32&expand=5457)
21075#[inline]
21076#[target_feature(enable = "avx512f,avx512vl")]
21077#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21078#[cfg_attr(test, assert_instr(vpsravd))]
21079pub fn _mm_mask_srav_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21080 unsafe {
21081 let shf: i32x4 = _mm_srav_epi32(a, count).as_i32x4();
21082 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
21083 }
21084}
21085
21086/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21087///
21088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi32&expand=5458)
21089#[inline]
21090#[target_feature(enable = "avx512f,avx512vl")]
21091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21092#[cfg_attr(test, assert_instr(vpsravd))]
21093pub fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21094 unsafe {
21095 let shf: i32x4 = _mm_srav_epi32(a, count).as_i32x4();
21096 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
21097 }
21098}
21099
21100/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21101///
21102/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srav_epi64&expand=5474)
21103#[inline]
21104#[target_feature(enable = "avx512f")]
21105#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21106#[cfg_attr(test, assert_instr(vpsravq))]
21107pub fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
21108 unsafe { transmute(src:vpsravq(a.as_i64x8(), count.as_i64x8())) }
21109}
21110
21111/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21112///
21113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srav_epi64&expand=5472)
21114#[inline]
21115#[target_feature(enable = "avx512f")]
21116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21117#[cfg_attr(test, assert_instr(vpsravq))]
21118pub fn _mm512_mask_srav_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21119 unsafe {
21120 let shf: i64x8 = _mm512_srav_epi64(a, count).as_i64x8();
21121 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
21122 }
21123}
21124
21125/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21126///
21127/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srav_epi64&expand=5473)
21128#[inline]
21129#[target_feature(enable = "avx512f")]
21130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21131#[cfg_attr(test, assert_instr(vpsravq))]
21132pub fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21133 unsafe {
21134 let shf: i64x8 = _mm512_srav_epi64(a, count).as_i64x8();
21135 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
21136 }
21137}
21138
21139/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21140///
21141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_srav_epi64&expand=5471)
21142#[inline]
21143#[target_feature(enable = "avx512f,avx512vl")]
21144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21145#[cfg_attr(test, assert_instr(vpsravq))]
21146pub fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
21147 unsafe { transmute(src:vpsravq256(a.as_i64x4(), count.as_i64x4())) }
21148}
21149
21150/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21151///
21152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srav_epi64&expand=5469)
21153#[inline]
21154#[target_feature(enable = "avx512f,avx512vl")]
21155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21156#[cfg_attr(test, assert_instr(vpsravq))]
21157pub fn _mm256_mask_srav_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21158 unsafe {
21159 let shf: i64x4 = _mm256_srav_epi64(a, count).as_i64x4();
21160 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
21161 }
21162}
21163
21164/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21165///
21166/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srav_epi64&expand=5470)
21167#[inline]
21168#[target_feature(enable = "avx512f,avx512vl")]
21169#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21170#[cfg_attr(test, assert_instr(vpsravq))]
21171pub fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21172 unsafe {
21173 let shf: i64x4 = _mm256_srav_epi64(a, count).as_i64x4();
21174 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
21175 }
21176}
21177
21178/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst.
21179///
21180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_srav_epi64&expand=5468)
21181#[inline]
21182#[target_feature(enable = "avx512f,avx512vl")]
21183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21184#[cfg_attr(test, assert_instr(vpsravq))]
21185pub fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
21186 unsafe { transmute(src:vpsravq128(a.as_i64x2(), count.as_i64x2())) }
21187}
21188
21189/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21190///
21191/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srav_epi64&expand=5466)
21192#[inline]
21193#[target_feature(enable = "avx512f,avx512vl")]
21194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21195#[cfg_attr(test, assert_instr(vpsravq))]
21196pub fn _mm_mask_srav_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21197 unsafe {
21198 let shf: i64x2 = _mm_srav_epi64(a, count).as_i64x2();
21199 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
21200 }
21201}
21202
21203/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in sign bits, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21204///
21205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srav_epi64&expand=5467)
21206#[inline]
21207#[target_feature(enable = "avx512f,avx512vl")]
21208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21209#[cfg_attr(test, assert_instr(vpsravq))]
21210pub fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21211 unsafe {
21212 let shf: i64x2 = _mm_srav_epi64(a, count).as_i64x2();
21213 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
21214 }
21215}
21216
21217/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21218///
21219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi32&expand=4703)
21220#[inline]
21221#[target_feature(enable = "avx512f")]
21222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21223#[cfg_attr(test, assert_instr(vprolvd))]
21224pub fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
21225 unsafe { transmute(src:vprolvd(a.as_i32x16(), b.as_i32x16())) }
21226}
21227
21228/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21229///
21230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi32&expand=4701)
21231#[inline]
21232#[target_feature(enable = "avx512f")]
21233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21234#[cfg_attr(test, assert_instr(vprolvd))]
21235pub fn _mm512_mask_rolv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21236 unsafe {
21237 let rol: i32x16 = _mm512_rolv_epi32(a, b).as_i32x16();
21238 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x16()))
21239 }
21240}
21241
21242/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21243///
21244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi32&expand=4702)
21245#[inline]
21246#[target_feature(enable = "avx512f")]
21247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21248#[cfg_attr(test, assert_instr(vprolvd))]
21249pub fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21250 unsafe {
21251 let rol: i32x16 = _mm512_rolv_epi32(a, b).as_i32x16();
21252 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i32x16::ZERO))
21253 }
21254}
21255
21256/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21257///
21258/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi32&expand=4700)
21259#[inline]
21260#[target_feature(enable = "avx512f,avx512vl")]
21261#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21262#[cfg_attr(test, assert_instr(vprolvd))]
21263pub fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
21264 unsafe { transmute(src:vprolvd256(a.as_i32x8(), b.as_i32x8())) }
21265}
21266
21267/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21268///
21269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi32&expand=4698)
21270#[inline]
21271#[target_feature(enable = "avx512f,avx512vl")]
21272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21273#[cfg_attr(test, assert_instr(vprolvd))]
21274pub fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21275 unsafe {
21276 let rol: i32x8 = _mm256_rolv_epi32(a, b).as_i32x8();
21277 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x8()))
21278 }
21279}
21280
21281/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21282///
21283/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi32&expand=4699)
21284#[inline]
21285#[target_feature(enable = "avx512f,avx512vl")]
21286#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21287#[cfg_attr(test, assert_instr(vprolvd))]
21288pub fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21289 unsafe {
21290 let rol: i32x8 = _mm256_rolv_epi32(a, b).as_i32x8();
21291 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i32x8::ZERO))
21292 }
21293}
21294
21295/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21296///
21297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi32&expand=4697)
21298#[inline]
21299#[target_feature(enable = "avx512f,avx512vl")]
21300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21301#[cfg_attr(test, assert_instr(vprolvd))]
21302pub fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
21303 unsafe { transmute(src:vprolvd128(a.as_i32x4(), b.as_i32x4())) }
21304}
21305
21306/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21307///
21308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi32&expand=4695)
21309#[inline]
21310#[target_feature(enable = "avx512f,avx512vl")]
21311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21312#[cfg_attr(test, assert_instr(vprolvd))]
21313pub fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21314 unsafe {
21315 let rol: i32x4 = _mm_rolv_epi32(a, b).as_i32x4();
21316 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i32x4()))
21317 }
21318}
21319
21320/// Rotate the bits in each packed 32-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21321///
21322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi32&expand=4696)
21323#[inline]
21324#[target_feature(enable = "avx512f,avx512vl")]
21325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21326#[cfg_attr(test, assert_instr(vprolvd))]
21327pub fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21328 unsafe {
21329 let rol: i32x4 = _mm_rolv_epi32(a, b).as_i32x4();
21330 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i32x4::ZERO))
21331 }
21332}
21333
21334/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21335///
21336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi32&expand=4739)
21337#[inline]
21338#[target_feature(enable = "avx512f")]
21339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21340#[cfg_attr(test, assert_instr(vprorvd))]
21341pub fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
21342 unsafe { transmute(src:vprorvd(a.as_i32x16(), b.as_i32x16())) }
21343}
21344
21345/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21346///
21347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi32&expand=4737)
21348#[inline]
21349#[target_feature(enable = "avx512f")]
21350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21351#[cfg_attr(test, assert_instr(vprorvd))]
21352pub fn _mm512_mask_rorv_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21353 unsafe {
21354 let ror: i32x16 = _mm512_rorv_epi32(a, b).as_i32x16();
21355 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x16()))
21356 }
21357}
21358
21359/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21360///
21361/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi32&expand=4738)
21362#[inline]
21363#[target_feature(enable = "avx512f")]
21364#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21365#[cfg_attr(test, assert_instr(vprorvd))]
21366pub fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
21367 unsafe {
21368 let ror: i32x16 = _mm512_rorv_epi32(a, b).as_i32x16();
21369 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i32x16::ZERO))
21370 }
21371}
21372
21373/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21374///
21375/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi32&expand=4736)
21376#[inline]
21377#[target_feature(enable = "avx512f,avx512vl")]
21378#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21379#[cfg_attr(test, assert_instr(vprorvd))]
21380pub fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
21381 unsafe { transmute(src:vprorvd256(a.as_i32x8(), b.as_i32x8())) }
21382}
21383
21384/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21385///
21386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi32&expand=4734)
21387#[inline]
21388#[target_feature(enable = "avx512f,avx512vl")]
21389#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21390#[cfg_attr(test, assert_instr(vprorvd))]
21391pub fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21392 unsafe {
21393 let ror: i32x8 = _mm256_rorv_epi32(a, b).as_i32x8();
21394 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x8()))
21395 }
21396}
21397
21398/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21399///
21400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi32&expand=4735)
21401#[inline]
21402#[target_feature(enable = "avx512f,avx512vl")]
21403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21404#[cfg_attr(test, assert_instr(vprorvd))]
21405pub fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21406 unsafe {
21407 let ror: i32x8 = _mm256_rorv_epi32(a, b).as_i32x8();
21408 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i32x8::ZERO))
21409 }
21410}
21411
21412/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21413///
21414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi32&expand=4733)
21415#[inline]
21416#[target_feature(enable = "avx512f,avx512vl")]
21417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21418#[cfg_attr(test, assert_instr(vprorvd))]
21419pub fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
21420 unsafe { transmute(src:vprorvd128(a.as_i32x4(), b.as_i32x4())) }
21421}
21422
21423/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21424///
21425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi32&expand=4731)
21426#[inline]
21427#[target_feature(enable = "avx512f,avx512vl")]
21428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21429#[cfg_attr(test, assert_instr(vprorvd))]
21430pub fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21431 unsafe {
21432 let ror: i32x4 = _mm_rorv_epi32(a, b).as_i32x4();
21433 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i32x4()))
21434 }
21435}
21436
21437/// Rotate the bits in each packed 32-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21438///
21439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi32&expand=4732)
21440#[inline]
21441#[target_feature(enable = "avx512f,avx512vl")]
21442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21443#[cfg_attr(test, assert_instr(vprorvd))]
21444pub fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21445 unsafe {
21446 let ror: i32x4 = _mm_rorv_epi32(a, b).as_i32x4();
21447 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i32x4::ZERO))
21448 }
21449}
21450
21451/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21452///
21453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rolv_epi64&expand=4712)
21454#[inline]
21455#[target_feature(enable = "avx512f")]
21456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21457#[cfg_attr(test, assert_instr(vprolvq))]
21458pub fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
21459 unsafe { transmute(src:vprolvq(a.as_i64x8(), b.as_i64x8())) }
21460}
21461
21462/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21463///
21464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rolv_epi64&expand=4710)
21465#[inline]
21466#[target_feature(enable = "avx512f")]
21467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21468#[cfg_attr(test, assert_instr(vprolvq))]
21469pub fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21470 unsafe {
21471 let rol: i64x8 = _mm512_rolv_epi64(a, b).as_i64x8();
21472 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x8()))
21473 }
21474}
21475
21476/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21477///
21478/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rolv_epi64&expand=4711)
21479#[inline]
21480#[target_feature(enable = "avx512f")]
21481#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21482#[cfg_attr(test, assert_instr(vprolvq))]
21483pub fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21484 unsafe {
21485 let rol: i64x8 = _mm512_rolv_epi64(a, b).as_i64x8();
21486 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i64x8::ZERO))
21487 }
21488}
21489
21490/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21491///
21492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rolv_epi64&expand=4709)
21493#[inline]
21494#[target_feature(enable = "avx512f,avx512vl")]
21495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21496#[cfg_attr(test, assert_instr(vprolvq))]
21497pub fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
21498 unsafe { transmute(src:vprolvq256(a.as_i64x4(), b.as_i64x4())) }
21499}
21500
21501/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21502///
21503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rolv_epi64&expand=4707)
21504#[inline]
21505#[target_feature(enable = "avx512f,avx512vl")]
21506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21507#[cfg_attr(test, assert_instr(vprolvq))]
21508pub fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21509 unsafe {
21510 let rol: i64x4 = _mm256_rolv_epi64(a, b).as_i64x4();
21511 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x4()))
21512 }
21513}
21514
21515/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21516///
21517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rolv_epi64&expand=4708)
21518#[inline]
21519#[target_feature(enable = "avx512f,avx512vl")]
21520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21521#[cfg_attr(test, assert_instr(vprolvq))]
21522pub fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21523 unsafe {
21524 let rol: i64x4 = _mm256_rolv_epi64(a, b).as_i64x4();
21525 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i64x4::ZERO))
21526 }
21527}
21528
21529/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst.
21530///
21531/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rolv_epi64&expand=4706)
21532#[inline]
21533#[target_feature(enable = "avx512f,avx512vl")]
21534#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21535#[cfg_attr(test, assert_instr(vprolvq))]
21536pub fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
21537 unsafe { transmute(src:vprolvq128(a.as_i64x2(), b.as_i64x2())) }
21538}
21539
21540/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21541///
21542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rolv_epi64&expand=4704)
21543#[inline]
21544#[target_feature(enable = "avx512f,avx512vl")]
21545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21546#[cfg_attr(test, assert_instr(vprolvq))]
21547pub fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21548 unsafe {
21549 let rol: i64x2 = _mm_rolv_epi64(a, b).as_i64x2();
21550 transmute(src:simd_select_bitmask(m:k, yes:rol, no:src.as_i64x2()))
21551 }
21552}
21553
21554/// Rotate the bits in each packed 64-bit integer in a to the left by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21555///
21556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rolv_epi64&expand=4705)
21557#[inline]
21558#[target_feature(enable = "avx512f,avx512vl")]
21559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21560#[cfg_attr(test, assert_instr(vprolvq))]
21561pub fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21562 unsafe {
21563 let rol: i64x2 = _mm_rolv_epi64(a, b).as_i64x2();
21564 transmute(src:simd_select_bitmask(m:k, yes:rol, no:i64x2::ZERO))
21565 }
21566}
21567
21568/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21569///
21570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_rorv_epi64&expand=4748)
21571#[inline]
21572#[target_feature(enable = "avx512f")]
21573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21574#[cfg_attr(test, assert_instr(vprorvq))]
21575pub fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
21576 unsafe { transmute(src:vprorvq(a.as_i64x8(), b.as_i64x8())) }
21577}
21578
21579/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21580///
21581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_rorv_epi64&expand=4746)
21582#[inline]
21583#[target_feature(enable = "avx512f")]
21584#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21585#[cfg_attr(test, assert_instr(vprorvq))]
21586pub fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21587 unsafe {
21588 let ror: i64x8 = _mm512_rorv_epi64(a, b).as_i64x8();
21589 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x8()))
21590 }
21591}
21592
21593/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21594///
21595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_rorv_epi64&expand=4747)
21596#[inline]
21597#[target_feature(enable = "avx512f")]
21598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21599#[cfg_attr(test, assert_instr(vprorvq))]
21600pub fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
21601 unsafe {
21602 let ror: i64x8 = _mm512_rorv_epi64(a, b).as_i64x8();
21603 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i64x8::ZERO))
21604 }
21605}
21606
21607/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21608///
21609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_rorv_epi64&expand=4745)
21610#[inline]
21611#[target_feature(enable = "avx512f,avx512vl")]
21612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21613#[cfg_attr(test, assert_instr(vprorvq))]
21614pub fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
21615 unsafe { transmute(src:vprorvq256(a.as_i64x4(), b.as_i64x4())) }
21616}
21617
21618/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21619///
21620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_rorv_epi64&expand=4743)
21621#[inline]
21622#[target_feature(enable = "avx512f,avx512vl")]
21623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21624#[cfg_attr(test, assert_instr(vprorvq))]
21625pub fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21626 unsafe {
21627 let ror: i64x4 = _mm256_rorv_epi64(a, b).as_i64x4();
21628 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x4()))
21629 }
21630}
21631
21632/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21633///
21634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_rorv_epi64&expand=4744)
21635#[inline]
21636#[target_feature(enable = "avx512f,avx512vl")]
21637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21638#[cfg_attr(test, assert_instr(vprorvq))]
21639pub fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
21640 unsafe {
21641 let ror: i64x4 = _mm256_rorv_epi64(a, b).as_i64x4();
21642 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i64x4::ZERO))
21643 }
21644}
21645
21646/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst.
21647///
21648/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_rorv_epi64&expand=4742)
21649#[inline]
21650#[target_feature(enable = "avx512f,avx512vl")]
21651#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21652#[cfg_attr(test, assert_instr(vprorvq))]
21653pub fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
21654 unsafe { transmute(src:vprorvq128(a.as_i64x2(), b.as_i64x2())) }
21655}
21656
21657/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21658///
21659/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_rorv_epi64&expand=4740)
21660#[inline]
21661#[target_feature(enable = "avx512f,avx512vl")]
21662#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21663#[cfg_attr(test, assert_instr(vprorvq))]
21664pub fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21665 unsafe {
21666 let ror: i64x2 = _mm_rorv_epi64(a, b).as_i64x2();
21667 transmute(src:simd_select_bitmask(m:k, yes:ror, no:src.as_i64x2()))
21668 }
21669}
21670
21671/// Rotate the bits in each packed 64-bit integer in a to the right by the number of bits specified in the corresponding element of b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21672///
21673/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_rorv_epi64&expand=4741)
21674#[inline]
21675#[target_feature(enable = "avx512f,avx512vl")]
21676#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21677#[cfg_attr(test, assert_instr(vprorvq))]
21678pub fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
21679 unsafe {
21680 let ror: i64x2 = _mm_rorv_epi64(a, b).as_i64x2();
21681 transmute(src:simd_select_bitmask(m:k, yes:ror, no:i64x2::ZERO))
21682 }
21683}
21684
21685/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21686///
21687/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi32&expand=5342)
21688#[inline]
21689#[target_feature(enable = "avx512f")]
21690#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21691#[cfg_attr(test, assert_instr(vpsllvd))]
21692pub fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
21693 unsafe { transmute(src:vpsllvd(a.as_i32x16(), b:count.as_i32x16())) }
21694}
21695
21696/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21697///
21698/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi32&expand=5340)
21699#[inline]
21700#[target_feature(enable = "avx512f")]
21701#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21702#[cfg_attr(test, assert_instr(vpsllvd))]
21703pub fn _mm512_mask_sllv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21704 unsafe {
21705 let shf: i32x16 = _mm512_sllv_epi32(a, count).as_i32x16();
21706 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
21707 }
21708}
21709
21710/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21711///
21712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi32&expand=5341)
21713#[inline]
21714#[target_feature(enable = "avx512f")]
21715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21716#[cfg_attr(test, assert_instr(vpsllvd))]
21717pub fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21718 unsafe {
21719 let shf: i32x16 = _mm512_sllv_epi32(a, count).as_i32x16();
21720 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
21721 }
21722}
21723
21724/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21725///
21726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi32&expand=5337)
21727#[inline]
21728#[target_feature(enable = "avx512f,avx512vl")]
21729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21730#[cfg_attr(test, assert_instr(vpsllvd))]
21731pub fn _mm256_mask_sllv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21732 unsafe {
21733 let shf: i32x8 = _mm256_sllv_epi32(a, count).as_i32x8();
21734 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
21735 }
21736}
21737
21738/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21739///
21740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi32&expand=5338)
21741#[inline]
21742#[target_feature(enable = "avx512f,avx512vl")]
21743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21744#[cfg_attr(test, assert_instr(vpsllvd))]
21745pub fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21746 unsafe {
21747 let shf: i32x8 = _mm256_sllv_epi32(a, count).as_i32x8();
21748 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
21749 }
21750}
21751
21752/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21753///
21754/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi32&expand=5334)
21755#[inline]
21756#[target_feature(enable = "avx512f,avx512vl")]
21757#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21758#[cfg_attr(test, assert_instr(vpsllvd))]
21759pub fn _mm_mask_sllv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21760 unsafe {
21761 let shf: i32x4 = _mm_sllv_epi32(a, count).as_i32x4();
21762 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
21763 }
21764}
21765
21766/// Shift packed 32-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21767///
21768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi32&expand=5335)
21769#[inline]
21770#[target_feature(enable = "avx512f,avx512vl")]
21771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21772#[cfg_attr(test, assert_instr(vpsllvd))]
21773pub fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21774 unsafe {
21775 let shf: i32x4 = _mm_sllv_epi32(a, count).as_i32x4();
21776 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
21777 }
21778}
21779
21780/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21781///
21782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi32&expand=5554)
21783#[inline]
21784#[target_feature(enable = "avx512f")]
21785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21786#[cfg_attr(test, assert_instr(vpsrlvd))]
21787pub fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
21788 unsafe { transmute(src:vpsrlvd(a.as_i32x16(), b:count.as_i32x16())) }
21789}
21790
21791/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21792///
21793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi32&expand=5552)
21794#[inline]
21795#[target_feature(enable = "avx512f")]
21796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21797#[cfg_attr(test, assert_instr(vpsrlvd))]
21798pub fn _mm512_mask_srlv_epi32(src: __m512i, k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21799 unsafe {
21800 let shf: i32x16 = _mm512_srlv_epi32(a, count).as_i32x16();
21801 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x16()))
21802 }
21803}
21804
21805/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21806///
21807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi32&expand=5553)
21808#[inline]
21809#[target_feature(enable = "avx512f")]
21810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21811#[cfg_attr(test, assert_instr(vpsrlvd))]
21812pub fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
21813 unsafe {
21814 let shf: i32x16 = _mm512_srlv_epi32(a, count).as_i32x16();
21815 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x16::ZERO))
21816 }
21817}
21818
21819/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21820///
21821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi32&expand=5549)
21822#[inline]
21823#[target_feature(enable = "avx512f,avx512vl")]
21824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21825#[cfg_attr(test, assert_instr(vpsrlvd))]
21826pub fn _mm256_mask_srlv_epi32(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21827 unsafe {
21828 let shf: i32x8 = _mm256_srlv_epi32(a, count).as_i32x8();
21829 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x8()))
21830 }
21831}
21832
21833/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21834///
21835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi32&expand=5550)
21836#[inline]
21837#[target_feature(enable = "avx512f,avx512vl")]
21838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21839#[cfg_attr(test, assert_instr(vpsrlvd))]
21840pub fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21841 unsafe {
21842 let shf: i32x8 = _mm256_srlv_epi32(a, count).as_i32x8();
21843 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x8::ZERO))
21844 }
21845}
21846
21847/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21848///
21849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi32&expand=5546)
21850#[inline]
21851#[target_feature(enable = "avx512f,avx512vl")]
21852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21853#[cfg_attr(test, assert_instr(vpsrlvd))]
21854pub fn _mm_mask_srlv_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21855 unsafe {
21856 let shf: i32x4 = _mm_srlv_epi32(a, count).as_i32x4();
21857 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i32x4()))
21858 }
21859}
21860
21861/// Shift packed 32-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21862///
21863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi32&expand=5547)
21864#[inline]
21865#[target_feature(enable = "avx512f,avx512vl")]
21866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21867#[cfg_attr(test, assert_instr(vpsrlvd))]
21868pub fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21869 unsafe {
21870 let shf: i32x4 = _mm_srlv_epi32(a, count).as_i32x4();
21871 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i32x4::ZERO))
21872 }
21873}
21874
21875/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21876///
21877/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_sllv_epi64&expand=5351)
21878#[inline]
21879#[target_feature(enable = "avx512f")]
21880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21881#[cfg_attr(test, assert_instr(vpsllvq))]
21882pub fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
21883 unsafe { transmute(src:vpsllvq(a.as_i64x8(), b:count.as_i64x8())) }
21884}
21885
21886/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21887///
21888/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_sllv_epi64&expand=5349)
21889#[inline]
21890#[target_feature(enable = "avx512f")]
21891#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21892#[cfg_attr(test, assert_instr(vpsllvq))]
21893pub fn _mm512_mask_sllv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21894 unsafe {
21895 let shf: i64x8 = _mm512_sllv_epi64(a, count).as_i64x8();
21896 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
21897 }
21898}
21899
21900/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21901///
21902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_sllv_epi64&expand=5350)
21903#[inline]
21904#[target_feature(enable = "avx512f")]
21905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21906#[cfg_attr(test, assert_instr(vpsllvq))]
21907pub fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21908 unsafe {
21909 let shf: i64x8 = _mm512_sllv_epi64(a, count).as_i64x8();
21910 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
21911 }
21912}
21913
21914/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21915///
21916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_sllv_epi64&expand=5346)
21917#[inline]
21918#[target_feature(enable = "avx512f,avx512vl")]
21919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21920#[cfg_attr(test, assert_instr(vpsllvq))]
21921pub fn _mm256_mask_sllv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21922 unsafe {
21923 let shf: i64x4 = _mm256_sllv_epi64(a, count).as_i64x4();
21924 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
21925 }
21926}
21927
21928/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21929///
21930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_sllv_epi64&expand=5347)
21931#[inline]
21932#[target_feature(enable = "avx512f,avx512vl")]
21933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21934#[cfg_attr(test, assert_instr(vpsllvq))]
21935pub fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
21936 unsafe {
21937 let shf: i64x4 = _mm256_sllv_epi64(a, count).as_i64x4();
21938 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
21939 }
21940}
21941
21942/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21943///
21944/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_sllv_epi64&expand=5343)
21945#[inline]
21946#[target_feature(enable = "avx512f,avx512vl")]
21947#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21948#[cfg_attr(test, assert_instr(vpsllvq))]
21949pub fn _mm_mask_sllv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21950 unsafe {
21951 let shf: i64x2 = _mm_sllv_epi64(a, count).as_i64x2();
21952 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
21953 }
21954}
21955
21956/// Shift packed 64-bit integers in a left by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21957///
21958/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_sllv_epi64&expand=5344)
21959#[inline]
21960#[target_feature(enable = "avx512f,avx512vl")]
21961#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21962#[cfg_attr(test, assert_instr(vpsllvq))]
21963pub fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
21964 unsafe {
21965 let shf: i64x2 = _mm_sllv_epi64(a, count).as_i64x2();
21966 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
21967 }
21968}
21969
21970/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst.
21971///
21972/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_srlv_epi64&expand=5563)
21973#[inline]
21974#[target_feature(enable = "avx512f")]
21975#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21976#[cfg_attr(test, assert_instr(vpsrlvq))]
21977pub fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
21978 unsafe { transmute(src:vpsrlvq(a.as_i64x8(), b:count.as_i64x8())) }
21979}
21980
21981/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
21982///
21983/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_srlv_epi64&expand=5561)
21984#[inline]
21985#[target_feature(enable = "avx512f")]
21986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
21987#[cfg_attr(test, assert_instr(vpsrlvq))]
21988pub fn _mm512_mask_srlv_epi64(src: __m512i, k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
21989 unsafe {
21990 let shf: i64x8 = _mm512_srlv_epi64(a, count).as_i64x8();
21991 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x8()))
21992 }
21993}
21994
21995/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
21996///
21997/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_srlv_epi64&expand=5562)
21998#[inline]
21999#[target_feature(enable = "avx512f")]
22000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22001#[cfg_attr(test, assert_instr(vpsrlvq))]
22002pub fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
22003 unsafe {
22004 let shf: i64x8 = _mm512_srlv_epi64(a, count).as_i64x8();
22005 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x8::ZERO))
22006 }
22007}
22008
22009/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22010///
22011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_srlv_epi64&expand=5558)
22012#[inline]
22013#[target_feature(enable = "avx512f,avx512vl")]
22014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22015#[cfg_attr(test, assert_instr(vpsrlvq))]
22016pub fn _mm256_mask_srlv_epi64(src: __m256i, k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22017 unsafe {
22018 let shf: i64x4 = _mm256_srlv_epi64(a, count).as_i64x4();
22019 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x4()))
22020 }
22021}
22022
22023/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22024///
22025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_srlv_epi64&expand=5559)
22026#[inline]
22027#[target_feature(enable = "avx512f,avx512vl")]
22028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22029#[cfg_attr(test, assert_instr(vpsrlvq))]
22030pub fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
22031 unsafe {
22032 let shf: i64x4 = _mm256_srlv_epi64(a, count).as_i64x4();
22033 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x4::ZERO))
22034 }
22035}
22036
22037/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22038///
22039/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_srlv_epi64&expand=5555)
22040#[inline]
22041#[target_feature(enable = "avx512f,avx512vl")]
22042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22043#[cfg_attr(test, assert_instr(vpsrlvq))]
22044pub fn _mm_mask_srlv_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22045 unsafe {
22046 let shf: i64x2 = _mm_srlv_epi64(a, count).as_i64x2();
22047 transmute(src:simd_select_bitmask(m:k, yes:shf, no:src.as_i64x2()))
22048 }
22049}
22050
22051/// Shift packed 64-bit integers in a right by the amount specified by the corresponding element in count while shifting in zeros, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22052///
22053/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_srlv_epi64&expand=5556)
22054#[inline]
22055#[target_feature(enable = "avx512f,avx512vl")]
22056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22057#[cfg_attr(test, assert_instr(vpsrlvq))]
22058pub fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
22059 unsafe {
22060 let shf: i64x2 = _mm_srlv_epi64(a, count).as_i64x2();
22061 transmute(src:simd_select_bitmask(m:k, yes:shf, no:i64x2::ZERO))
22062 }
22063}
22064
22065/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22066///
22067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_ps&expand=4170)
22068#[inline]
22069#[target_feature(enable = "avx512f")]
22070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22071#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22072#[rustc_legacy_const_generics(1)]
22073pub fn _mm512_permute_ps<const MASK: i32>(a: __m512) -> __m512 {
22074 unsafe {
22075 static_assert_uimm_bits!(MASK, 8);
22076 simd_shuffle!(
22077 a,
22078 a,
22079 [
22080 MASK as u32 & 0b11,
22081 (MASK as u32 >> 2) & 0b11,
22082 ((MASK as u32 >> 4) & 0b11),
22083 ((MASK as u32 >> 6) & 0b11),
22084 (MASK as u32 & 0b11) + 4,
22085 ((MASK as u32 >> 2) & 0b11) + 4,
22086 ((MASK as u32 >> 4) & 0b11) + 4,
22087 ((MASK as u32 >> 6) & 0b11) + 4,
22088 (MASK as u32 & 0b11) + 8,
22089 ((MASK as u32 >> 2) & 0b11) + 8,
22090 ((MASK as u32 >> 4) & 0b11) + 8,
22091 ((MASK as u32 >> 6) & 0b11) + 8,
22092 (MASK as u32 & 0b11) + 12,
22093 ((MASK as u32 >> 2) & 0b11) + 12,
22094 ((MASK as u32 >> 4) & 0b11) + 12,
22095 ((MASK as u32 >> 6) & 0b11) + 12,
22096 ],
22097 )
22098 }
22099}
22100
22101/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22102///
22103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_ps&expand=4168)
22104#[inline]
22105#[target_feature(enable = "avx512f")]
22106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22107#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22108#[rustc_legacy_const_generics(3)]
22109pub fn _mm512_mask_permute_ps<const MASK: i32>(src: __m512, k: __mmask16, a: __m512) -> __m512 {
22110 unsafe {
22111 static_assert_uimm_bits!(MASK, 8);
22112 let r: __m512 = _mm512_permute_ps::<MASK>(a);
22113 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
22114 }
22115}
22116
22117/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22118///
22119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_ps&expand=4169)
22120#[inline]
22121#[target_feature(enable = "avx512f")]
22122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22123#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22124#[rustc_legacy_const_generics(2)]
22125pub fn _mm512_maskz_permute_ps<const MASK: i32>(k: __mmask16, a: __m512) -> __m512 {
22126 unsafe {
22127 static_assert_uimm_bits!(MASK, 8);
22128 let r: __m512 = _mm512_permute_ps::<MASK>(a);
22129 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
22130 }
22131}
22132
22133/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22134///
22135/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_ps&expand=4165)
22136#[inline]
22137#[target_feature(enable = "avx512f,avx512vl")]
22138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22139#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22140#[rustc_legacy_const_generics(3)]
22141pub fn _mm256_mask_permute_ps<const MASK: i32>(src: __m256, k: __mmask8, a: __m256) -> __m256 {
22142 unsafe {
22143 let r: __m256 = _mm256_permute_ps::<MASK>(a);
22144 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
22145 }
22146}
22147
22148/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22149///
22150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_ps&expand=4166)
22151#[inline]
22152#[target_feature(enable = "avx512f,avx512vl")]
22153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22154#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22155#[rustc_legacy_const_generics(2)]
22156pub fn _mm256_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m256) -> __m256 {
22157 unsafe {
22158 let r: __m256 = _mm256_permute_ps::<MASK>(a);
22159 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
22160 }
22161}
22162
22163/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22164///
22165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_ps&expand=4162)
22166#[inline]
22167#[target_feature(enable = "avx512f,avx512vl")]
22168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22169#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22170#[rustc_legacy_const_generics(3)]
22171pub fn _mm_mask_permute_ps<const MASK: i32>(src: __m128, k: __mmask8, a: __m128) -> __m128 {
22172 unsafe {
22173 let r: __m128 = _mm_permute_ps::<MASK>(a);
22174 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
22175 }
22176}
22177
22178/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22179///
22180/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_ps&expand=4163)
22181#[inline]
22182#[target_feature(enable = "avx512f,avx512vl")]
22183#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22184#[cfg_attr(test, assert_instr(vshufps, MASK = 0b11_00_01_11))]
22185#[rustc_legacy_const_generics(2)]
22186pub fn _mm_maskz_permute_ps<const MASK: i32>(k: __mmask8, a: __m128) -> __m128 {
22187 unsafe {
22188 let r: __m128 = _mm_permute_ps::<MASK>(a);
22189 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
22190 }
22191}
22192
22193/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
22194///
22195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permute_pd&expand=4161)
22196#[inline]
22197#[target_feature(enable = "avx512f")]
22198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22199#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22200#[rustc_legacy_const_generics(1)]
22201pub fn _mm512_permute_pd<const MASK: i32>(a: __m512d) -> __m512d {
22202 unsafe {
22203 static_assert_uimm_bits!(MASK, 8);
22204 simd_shuffle!(
22205 a,
22206 a,
22207 [
22208 MASK as u32 & 0b1,
22209 ((MASK as u32 >> 1) & 0b1),
22210 ((MASK as u32 >> 2) & 0b1) + 2,
22211 ((MASK as u32 >> 3) & 0b1) + 2,
22212 ((MASK as u32 >> 4) & 0b1) + 4,
22213 ((MASK as u32 >> 5) & 0b1) + 4,
22214 ((MASK as u32 >> 6) & 0b1) + 6,
22215 ((MASK as u32 >> 7) & 0b1) + 6,
22216 ],
22217 )
22218 }
22219}
22220
22221/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22222///
22223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permute_pd&expand=4159)
22224#[inline]
22225#[target_feature(enable = "avx512f")]
22226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22227#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22228#[rustc_legacy_const_generics(3)]
22229pub fn _mm512_mask_permute_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22230 unsafe {
22231 static_assert_uimm_bits!(MASK, 8);
22232 let r: __m512d = _mm512_permute_pd::<MASK>(a);
22233 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
22234 }
22235}
22236
22237/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22238///
22239/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permute_pd&expand=4160)
22240#[inline]
22241#[target_feature(enable = "avx512f")]
22242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22243#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01_10_01))]
22244#[rustc_legacy_const_generics(2)]
22245pub fn _mm512_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22246 unsafe {
22247 static_assert_uimm_bits!(MASK, 8);
22248 let r: __m512d = _mm512_permute_pd::<MASK>(a);
22249 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
22250 }
22251}
22252
22253/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22254///
22255/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permute_pd&expand=4156)
22256#[inline]
22257#[target_feature(enable = "avx512f,avx512vl")]
22258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22259#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
22260#[rustc_legacy_const_generics(3)]
22261pub fn _mm256_mask_permute_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22262 unsafe {
22263 static_assert_uimm_bits!(MASK, 4);
22264 let r: __m256d = _mm256_permute_pd::<MASK>(a);
22265 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
22266 }
22267}
22268
22269/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22270///
22271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permute_pd&expand=4157)
22272#[inline]
22273#[target_feature(enable = "avx512f,avx512vl")]
22274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22275#[cfg_attr(test, assert_instr(vshufpd, MASK = 0b11_01))]
22276#[rustc_legacy_const_generics(2)]
22277pub fn _mm256_maskz_permute_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22278 unsafe {
22279 static_assert_uimm_bits!(MASK, 4);
22280 let r: __m256d = _mm256_permute_pd::<MASK>(a);
22281 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
22282 }
22283}
22284
22285/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22286///
22287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permute_pd&expand=4153)
22288#[inline]
22289#[target_feature(enable = "avx512f,avx512vl")]
22290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22291#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
22292#[rustc_legacy_const_generics(3)]
22293pub fn _mm_mask_permute_pd<const IMM2: i32>(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
22294 unsafe {
22295 static_assert_uimm_bits!(IMM2, 2);
22296 let r: __m128d = _mm_permute_pd::<IMM2>(a);
22297 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:src.as_f64x2()))
22298 }
22299}
22300
22301/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22302///
22303/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permute_pd&expand=4154)
22304#[inline]
22305#[target_feature(enable = "avx512f,avx512vl")]
22306#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22307#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0b01))]
22308#[rustc_legacy_const_generics(2)]
22309pub fn _mm_maskz_permute_pd<const IMM2: i32>(k: __mmask8, a: __m128d) -> __m128d {
22310 unsafe {
22311 static_assert_uimm_bits!(IMM2, 2);
22312 let r: __m128d = _mm_permute_pd::<IMM2>(a);
22313 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:f64x2::ZERO))
22314 }
22315}
22316
22317/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22318///
22319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_epi64&expand=4208)
22320#[inline]
22321#[target_feature(enable = "avx512f")]
22322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22323#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22324#[rustc_legacy_const_generics(1)]
22325pub fn _mm512_permutex_epi64<const MASK: i32>(a: __m512i) -> __m512i {
22326 unsafe {
22327 static_assert_uimm_bits!(MASK, 8);
22328 simd_shuffle!(
22329 a,
22330 a,
22331 [
22332 MASK as u32 & 0b11,
22333 (MASK as u32 >> 2) & 0b11,
22334 ((MASK as u32 >> 4) & 0b11),
22335 ((MASK as u32 >> 6) & 0b11),
22336 (MASK as u32 & 0b11) + 4,
22337 ((MASK as u32 >> 2) & 0b11) + 4,
22338 ((MASK as u32 >> 4) & 0b11) + 4,
22339 ((MASK as u32 >> 6) & 0b11) + 4,
22340 ],
22341 )
22342 }
22343}
22344
22345/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22346///
22347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_epi64&expand=4206)
22348#[inline]
22349#[target_feature(enable = "avx512f")]
22350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22351#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22352#[rustc_legacy_const_generics(3)]
22353pub fn _mm512_mask_permutex_epi64<const MASK: i32>(
22354 src: __m512i,
22355 k: __mmask8,
22356 a: __m512i,
22357) -> __m512i {
22358 unsafe {
22359 static_assert_uimm_bits!(MASK, 8);
22360 let r: __m512i = _mm512_permutex_epi64::<MASK>(a);
22361 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
22362 }
22363}
22364
22365/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22366///
22367/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_epi64&expand=4207)
22368#[inline]
22369#[target_feature(enable = "avx512f")]
22370#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22371#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22372#[rustc_legacy_const_generics(2)]
22373pub fn _mm512_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m512i) -> __m512i {
22374 unsafe {
22375 static_assert_uimm_bits!(MASK, 8);
22376 let r: __m512i = _mm512_permutex_epi64::<MASK>(a);
22377 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
22378 }
22379}
22380
22381/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst.
22382///
22383/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_epi64&expand=4205)
22384#[inline]
22385#[target_feature(enable = "avx512f,avx512vl")]
22386#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22387#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22388#[rustc_legacy_const_generics(1)]
22389pub fn _mm256_permutex_epi64<const MASK: i32>(a: __m256i) -> __m256i {
22390 unsafe {
22391 static_assert_uimm_bits!(MASK, 8);
22392 simd_shuffle!(
22393 a,
22394 a,
22395 [
22396 MASK as u32 & 0b11,
22397 (MASK as u32 >> 2) & 0b11,
22398 ((MASK as u32 >> 4) & 0b11),
22399 ((MASK as u32 >> 6) & 0b11),
22400 ],
22401 )
22402 }
22403}
22404
22405/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22406///
22407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_epi64&expand=4203)
22408#[inline]
22409#[target_feature(enable = "avx512f,avx512vl")]
22410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22411#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22412#[rustc_legacy_const_generics(3)]
22413pub fn _mm256_mask_permutex_epi64<const MASK: i32>(
22414 src: __m256i,
22415 k: __mmask8,
22416 a: __m256i,
22417) -> __m256i {
22418 unsafe {
22419 static_assert_uimm_bits!(MASK, 8);
22420 let r: __m256i = _mm256_permutex_epi64::<MASK>(a);
22421 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
22422 }
22423}
22424
22425/// Shuffle 64-bit integers in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22426///
22427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_epi64&expand=4204)
22428#[inline]
22429#[target_feature(enable = "avx512f,avx512vl")]
22430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22431#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermq
22432#[rustc_legacy_const_generics(2)]
22433pub fn _mm256_maskz_permutex_epi64<const MASK: i32>(k: __mmask8, a: __m256i) -> __m256i {
22434 unsafe {
22435 static_assert_uimm_bits!(MASK, 8);
22436 let r: __m256i = _mm256_permutex_epi64::<MASK>(a);
22437 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
22438 }
22439}
22440
22441/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22442///
22443/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex_pd&expand=4214)
22444#[inline]
22445#[target_feature(enable = "avx512f")]
22446#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22447#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22448#[rustc_legacy_const_generics(1)]
22449pub fn _mm512_permutex_pd<const MASK: i32>(a: __m512d) -> __m512d {
22450 unsafe {
22451 static_assert_uimm_bits!(MASK, 8);
22452 simd_shuffle!(
22453 a,
22454 a,
22455 [
22456 MASK as u32 & 0b11,
22457 (MASK as u32 >> 2) & 0b11,
22458 ((MASK as u32 >> 4) & 0b11),
22459 ((MASK as u32 >> 6) & 0b11),
22460 (MASK as u32 & 0b11) + 4,
22461 ((MASK as u32 >> 2) & 0b11) + 4,
22462 ((MASK as u32 >> 4) & 0b11) + 4,
22463 ((MASK as u32 >> 6) & 0b11) + 4,
22464 ],
22465 )
22466 }
22467}
22468
22469/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22470///
22471/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex_pd&expand=4212)
22472#[inline]
22473#[target_feature(enable = "avx512f")]
22474#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22475#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22476#[rustc_legacy_const_generics(3)]
22477pub fn _mm512_mask_permutex_pd<const MASK: i32>(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
22478 unsafe {
22479 let r: __m512d = _mm512_permutex_pd::<MASK>(a);
22480 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
22481 }
22482}
22483
22484/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22485///
22486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex_pd&expand=4213)
22487#[inline]
22488#[target_feature(enable = "avx512f")]
22489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22490#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22491#[rustc_legacy_const_generics(2)]
22492pub fn _mm512_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m512d) -> __m512d {
22493 unsafe {
22494 let r: __m512d = _mm512_permutex_pd::<MASK>(a);
22495 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
22496 }
22497}
22498
22499/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst.
22500///
22501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex_pd&expand=4211)
22502#[inline]
22503#[target_feature(enable = "avx512f,avx512vl")]
22504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22505#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22506#[rustc_legacy_const_generics(1)]
22507pub fn _mm256_permutex_pd<const MASK: i32>(a: __m256d) -> __m256d {
22508 unsafe {
22509 static_assert_uimm_bits!(MASK, 8);
22510 simd_shuffle!(
22511 a,
22512 a,
22513 [
22514 MASK as u32 & 0b11,
22515 (MASK as u32 >> 2) & 0b11,
22516 ((MASK as u32 >> 4) & 0b11),
22517 ((MASK as u32 >> 6) & 0b11),
22518 ],
22519 )
22520 }
22521}
22522
22523/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22524///
22525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex_pd&expand=4209)
22526#[inline]
22527#[target_feature(enable = "avx512f,avx512vl")]
22528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22529#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22530#[rustc_legacy_const_generics(3)]
22531pub fn _mm256_mask_permutex_pd<const MASK: i32>(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
22532 unsafe {
22533 static_assert_uimm_bits!(MASK, 8);
22534 let r: __m256d = _mm256_permutex_pd::<MASK>(a);
22535 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
22536 }
22537}
22538
22539/// Shuffle double-precision (64-bit) floating-point elements in a within 256-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22540///
22541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex_pd&expand=4210)
22542#[inline]
22543#[target_feature(enable = "avx512f,avx512vl")]
22544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22545#[cfg_attr(test, assert_instr(vperm, MASK = 0b10_01_10_11))] //should be vpermpd
22546#[rustc_legacy_const_generics(2)]
22547pub fn _mm256_maskz_permutex_pd<const MASK: i32>(k: __mmask8, a: __m256d) -> __m256d {
22548 unsafe {
22549 static_assert_uimm_bits!(MASK, 8);
22550 let r: __m256d = _mm256_permutex_pd::<MASK>(a);
22551 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
22552 }
22553}
22554
22555/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst. Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22556///
22557/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_epi32&expand=4182)
22558#[inline]
22559#[target_feature(enable = "avx512f")]
22560#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22561#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22562pub fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22563 unsafe { transmute(src:vpermd(a.as_i32x16(), idx.as_i32x16())) }
22564}
22565
22566/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). Note that this intrinsic shuffles across 128-bit lanes, unlike past intrinsics that use the permutevar name. This intrinsic is identical to _mm512_mask_permutexvar_epi32, and it is recommended that you use that intrinsic name.
22567///
22568/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_epi32&expand=4181)
22569#[inline]
22570#[target_feature(enable = "avx512f")]
22571#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22572#[cfg_attr(test, assert_instr(vpermd))]
22573pub fn _mm512_mask_permutevar_epi32(
22574 src: __m512i,
22575 k: __mmask16,
22576 idx: __m512i,
22577 a: __m512i,
22578) -> __m512i {
22579 unsafe {
22580 let permute: i32x16 = _mm512_permutevar_epi32(idx, a).as_i32x16();
22581 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x16()))
22582 }
22583}
22584
22585/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22586///
22587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_ps&expand=4200)
22588#[inline]
22589#[target_feature(enable = "avx512f")]
22590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22591#[cfg_attr(test, assert_instr(vpermilps))]
22592pub fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
22593 unsafe { transmute(src:vpermilps(a.as_f32x16(), b.as_i32x16())) }
22594}
22595
22596/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22597///
22598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_ps&expand=4198)
22599#[inline]
22600#[target_feature(enable = "avx512f")]
22601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22602#[cfg_attr(test, assert_instr(vpermilps))]
22603pub fn _mm512_mask_permutevar_ps(src: __m512, k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22604 unsafe {
22605 let permute: f32x16 = _mm512_permutevar_ps(a, b).as_f32x16();
22606 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x16()))
22607 }
22608}
22609
22610/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22611///
22612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_ps&expand=4199)
22613#[inline]
22614#[target_feature(enable = "avx512f")]
22615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22616#[cfg_attr(test, assert_instr(vpermilps))]
22617pub fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
22618 unsafe {
22619 let permute: f32x16 = _mm512_permutevar_ps(a, b).as_f32x16();
22620 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x16::ZERO))
22621 }
22622}
22623
22624/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22625///
22626/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm256_mask_permutevar_ps&expand=4195)
22627#[inline]
22628#[target_feature(enable = "avx512f,avx512vl")]
22629#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22630#[cfg_attr(test, assert_instr(vpermilps))]
22631pub fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22632 unsafe {
22633 let permute: f32x8 = _mm256_permutevar_ps(a, b).as_f32x8();
22634 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x8()))
22635 }
22636}
22637
22638/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22639///
22640/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_ps&expand=4196)
22641#[inline]
22642#[target_feature(enable = "avx512f,avx512vl")]
22643#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22644#[cfg_attr(test, assert_instr(vpermilps))]
22645pub fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
22646 unsafe {
22647 let permute: f32x8 = _mm256_permutevar_ps(a, b).as_f32x8();
22648 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x8::ZERO))
22649 }
22650}
22651
22652/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22653///
22654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_ps&expand=4192)
22655#[inline]
22656#[target_feature(enable = "avx512f,avx512vl")]
22657#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22658#[cfg_attr(test, assert_instr(vpermilps))]
22659pub fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22660 unsafe {
22661 let permute: f32x4 = _mm_permutevar_ps(a, b).as_f32x4();
22662 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x4()))
22663 }
22664}
22665
22666/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22667///
22668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_ps&expand=4193)
22669#[inline]
22670#[target_feature(enable = "avx512f,avx512vl")]
22671#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22672#[cfg_attr(test, assert_instr(vpermilps))]
22673pub fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
22674 unsafe {
22675 let permute: f32x4 = _mm_permutevar_ps(a, b).as_f32x4();
22676 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x4::ZERO))
22677 }
22678}
22679
22680/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst.
22681///
22682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutevar_pd&expand=4191)
22683#[inline]
22684#[target_feature(enable = "avx512f")]
22685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22686#[cfg_attr(test, assert_instr(vpermilpd))]
22687pub fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
22688 unsafe { transmute(src:vpermilpd(a.as_f64x8(), b.as_i64x8())) }
22689}
22690
22691/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22692///
22693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutevar_pd&expand=4189)
22694#[inline]
22695#[target_feature(enable = "avx512f")]
22696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22697#[cfg_attr(test, assert_instr(vpermilpd))]
22698pub fn _mm512_mask_permutevar_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22699 unsafe {
22700 let permute: f64x8 = _mm512_permutevar_pd(a, b).as_f64x8();
22701 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x8()))
22702 }
22703}
22704
22705/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22706///
22707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutevar_pd&expand=4190)
22708#[inline]
22709#[target_feature(enable = "avx512f")]
22710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22711#[cfg_attr(test, assert_instr(vpermilpd))]
22712pub fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
22713 unsafe {
22714 let permute: f64x8 = _mm512_permutevar_pd(a, b).as_f64x8();
22715 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x8::ZERO))
22716 }
22717}
22718
22719/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22720///
22721/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutevar_pd&expand=4186)
22722#[inline]
22723#[target_feature(enable = "avx512f,avx512vl")]
22724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22725#[cfg_attr(test, assert_instr(vpermilpd))]
22726pub fn _mm256_mask_permutevar_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22727 unsafe {
22728 let permute: f64x4 = _mm256_permutevar_pd(a, b).as_f64x4();
22729 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x4()))
22730 }
22731}
22732
22733/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22734///
22735/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutevar_pd&expand=4187)
22736#[inline]
22737#[target_feature(enable = "avx512f,avx512vl")]
22738#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22739#[cfg_attr(test, assert_instr(vpermilpd))]
22740pub fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
22741 unsafe {
22742 let permute: f64x4 = _mm256_permutevar_pd(a, b).as_f64x4();
22743 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x4::ZERO))
22744 }
22745}
22746
22747/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22748///
22749/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutevar_pd&expand=4183)
22750#[inline]
22751#[target_feature(enable = "avx512f,avx512vl")]
22752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22753#[cfg_attr(test, assert_instr(vpermilpd))]
22754pub fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22755 unsafe {
22756 let permute: f64x2 = _mm_permutevar_pd(a, b).as_f64x2();
22757 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x2()))
22758 }
22759}
22760
22761/// Shuffle double-precision (64-bit) floating-point elements in a within 128-bit lanes using the control in b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22762///
22763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutevar_pd&expand=4184)
22764#[inline]
22765#[target_feature(enable = "avx512f,avx512vl")]
22766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22767#[cfg_attr(test, assert_instr(vpermilpd))]
22768pub fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
22769 unsafe {
22770 let permute: f64x2 = _mm_permutevar_pd(a, b).as_f64x2();
22771 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x2::ZERO))
22772 }
22773}
22774
22775/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22776///
22777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi32&expand=4301)
22778#[inline]
22779#[target_feature(enable = "avx512f")]
22780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22781#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22782pub fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
22783 unsafe { transmute(src:vpermd(a.as_i32x16(), idx.as_i32x16())) }
22784}
22785
22786/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22787///
22788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi32&expand=4299)
22789#[inline]
22790#[target_feature(enable = "avx512f")]
22791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22792#[cfg_attr(test, assert_instr(vpermd))]
22793pub fn _mm512_mask_permutexvar_epi32(
22794 src: __m512i,
22795 k: __mmask16,
22796 idx: __m512i,
22797 a: __m512i,
22798) -> __m512i {
22799 unsafe {
22800 let permute: i32x16 = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22801 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x16()))
22802 }
22803}
22804
22805/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22806///
22807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi32&expand=4300)
22808#[inline]
22809#[target_feature(enable = "avx512f")]
22810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22811#[cfg_attr(test, assert_instr(vpermd))]
22812pub fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
22813 unsafe {
22814 let permute: i32x16 = _mm512_permutexvar_epi32(idx, a).as_i32x16();
22815 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x16::ZERO))
22816 }
22817}
22818
22819/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22820///
22821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi32&expand=4298)
22822#[inline]
22823#[target_feature(enable = "avx512f,avx512vl")]
22824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22825#[cfg_attr(test, assert_instr(vperm))] //should be vpermd
22826pub fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
22827 _mm256_permutevar8x32_epi32(a, b:idx) // llvm use llvm.x86.avx2.permd
22828}
22829
22830/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22831///
22832/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi32&expand=4296)
22833#[inline]
22834#[target_feature(enable = "avx512f,avx512vl")]
22835#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22836#[cfg_attr(test, assert_instr(vpermd))]
22837pub fn _mm256_mask_permutexvar_epi32(
22838 src: __m256i,
22839 k: __mmask8,
22840 idx: __m256i,
22841 a: __m256i,
22842) -> __m256i {
22843 unsafe {
22844 let permute: i32x8 = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22845 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i32x8()))
22846 }
22847}
22848
22849/// Shuffle 32-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22850///
22851/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi32&expand=4297)
22852#[inline]
22853#[target_feature(enable = "avx512f,avx512vl")]
22854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22855#[cfg_attr(test, assert_instr(vpermd))]
22856pub fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
22857 unsafe {
22858 let permute: i32x8 = _mm256_permutexvar_epi32(idx, a).as_i32x8();
22859 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x8::ZERO))
22860 }
22861}
22862
22863/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22864///
22865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_epi64&expand=4307)
22866#[inline]
22867#[target_feature(enable = "avx512f")]
22868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22869#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22870pub fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
22871 unsafe { transmute(src:vpermq(a.as_i64x8(), idx.as_i64x8())) }
22872}
22873
22874/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22875///
22876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_epi64&expand=4305)
22877#[inline]
22878#[target_feature(enable = "avx512f")]
22879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22880#[cfg_attr(test, assert_instr(vpermq))]
22881pub fn _mm512_mask_permutexvar_epi64(
22882 src: __m512i,
22883 k: __mmask8,
22884 idx: __m512i,
22885 a: __m512i,
22886) -> __m512i {
22887 unsafe {
22888 let permute: i64x8 = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22889 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i64x8()))
22890 }
22891}
22892
22893/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22894///
22895/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_epi64&expand=4306)
22896#[inline]
22897#[target_feature(enable = "avx512f")]
22898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22899#[cfg_attr(test, assert_instr(vpermq))]
22900pub fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
22901 unsafe {
22902 let permute: i64x8 = _mm512_permutexvar_epi64(idx, a).as_i64x8();
22903 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x8::ZERO))
22904 }
22905}
22906
22907/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst.
22908///
22909/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_epi64&expand=4304)
22910#[inline]
22911#[target_feature(enable = "avx512f,avx512vl")]
22912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22913#[cfg_attr(test, assert_instr(vperm))] //should be vpermq
22914pub fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
22915 unsafe { transmute(src:vpermq256(a.as_i64x4(), idx.as_i64x4())) }
22916}
22917
22918/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22919///
22920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_epi64&expand=4302)
22921#[inline]
22922#[target_feature(enable = "avx512f,avx512vl")]
22923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22924#[cfg_attr(test, assert_instr(vpermq))]
22925pub fn _mm256_mask_permutexvar_epi64(
22926 src: __m256i,
22927 k: __mmask8,
22928 idx: __m256i,
22929 a: __m256i,
22930) -> __m256i {
22931 unsafe {
22932 let permute: i64x4 = _mm256_permutexvar_epi64(idx, a).as_i64x4();
22933 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_i64x4()))
22934 }
22935}
22936
22937/// Shuffle 64-bit integers in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22938///
22939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_epi64&expand=4303)
22940#[inline]
22941#[target_feature(enable = "avx512f,avx512vl")]
22942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22943#[cfg_attr(test, assert_instr(vpermq))]
22944pub fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
22945 unsafe {
22946 let permute: i64x4 = _mm256_permutexvar_epi64(idx, a).as_i64x4();
22947 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x4::ZERO))
22948 }
22949}
22950
22951/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
22952///
22953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_ps&expand=4200)
22954#[inline]
22955#[target_feature(enable = "avx512f")]
22956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22957#[cfg_attr(test, assert_instr(vpermps))]
22958pub fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
22959 unsafe { transmute(src:vpermps(a.as_f32x16(), idx.as_i32x16())) }
22960}
22961
22962/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
22963///
22964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_ps&expand=4326)
22965#[inline]
22966#[target_feature(enable = "avx512f")]
22967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22968#[cfg_attr(test, assert_instr(vpermps))]
22969pub fn _mm512_mask_permutexvar_ps(src: __m512, k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
22970 unsafe {
22971 let permute: f32x16 = _mm512_permutexvar_ps(idx, a).as_f32x16();
22972 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x16()))
22973 }
22974}
22975
22976/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
22977///
22978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_ps&expand=4327)
22979#[inline]
22980#[target_feature(enable = "avx512f")]
22981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22982#[cfg_attr(test, assert_instr(vpermps))]
22983pub fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
22984 unsafe {
22985 let permute: f32x16 = _mm512_permutexvar_ps(idx, a).as_f32x16();
22986 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x16::ZERO))
22987 }
22988}
22989
22990/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx.
22991///
22992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_ps&expand=4325)
22993#[inline]
22994#[target_feature(enable = "avx512f,avx512vl")]
22995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
22996#[cfg_attr(test, assert_instr(vpermps))]
22997pub fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
22998 _mm256_permutevar8x32_ps(a, idx) //llvm.x86.avx2.permps
22999}
23000
23001/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23002///
23003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_ps&expand=4323)
23004#[inline]
23005#[target_feature(enable = "avx512f,avx512vl")]
23006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23007#[cfg_attr(test, assert_instr(vpermps))]
23008pub fn _mm256_mask_permutexvar_ps(src: __m256, k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23009 unsafe {
23010 let permute: f32x8 = _mm256_permutexvar_ps(idx, a).as_f32x8();
23011 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f32x8()))
23012 }
23013}
23014
23015/// Shuffle single-precision (32-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23016///
23017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_ps&expand=4324)
23018#[inline]
23019#[target_feature(enable = "avx512f,avx512vl")]
23020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23021#[cfg_attr(test, assert_instr(vpermps))]
23022pub fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
23023 unsafe {
23024 let permute: f32x8 = _mm256_permutexvar_ps(idx, a).as_f32x8();
23025 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x8::ZERO))
23026 }
23027}
23028
23029/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23030///
23031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutexvar_pd&expand=4322)
23032#[inline]
23033#[target_feature(enable = "avx512f")]
23034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23035#[cfg_attr(test, assert_instr(vpermpd))]
23036pub fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
23037 unsafe { transmute(src:vpermpd(a.as_f64x8(), idx.as_i64x8())) }
23038}
23039
23040/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23041///
23042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutexvar_pd&expand=4320)
23043#[inline]
23044#[target_feature(enable = "avx512f")]
23045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23046#[cfg_attr(test, assert_instr(vpermpd))]
23047pub fn _mm512_mask_permutexvar_pd(src: __m512d, k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23048 unsafe {
23049 let permute: f64x8 = _mm512_permutexvar_pd(idx, a).as_f64x8();
23050 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x8()))
23051 }
23052}
23053
23054/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23055///
23056/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutexvar_pd&expand=4321)
23057#[inline]
23058#[target_feature(enable = "avx512f")]
23059#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23060#[cfg_attr(test, assert_instr(vpermpd))]
23061pub fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
23062 unsafe {
23063 let permute: f64x8 = _mm512_permutexvar_pd(idx, a).as_f64x8();
23064 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x8::ZERO))
23065 }
23066}
23067
23068/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst.
23069///
23070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutexvar_pd&expand=4319)
23071#[inline]
23072#[target_feature(enable = "avx512f,avx512vl")]
23073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23074#[cfg_attr(test, assert_instr(vpermpd))]
23075pub fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
23076 unsafe { transmute(src:vpermpd256(a.as_f64x4(), idx.as_i64x4())) }
23077}
23078
23079/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23080///
23081/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutexvar_pd&expand=4317)
23082#[inline]
23083#[target_feature(enable = "avx512f,avx512vl")]
23084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23085#[cfg_attr(test, assert_instr(vpermpd))]
23086pub fn _mm256_mask_permutexvar_pd(src: __m256d, k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23087 unsafe {
23088 let permute: f64x4 = _mm256_permutexvar_pd(idx, a).as_f64x4();
23089 transmute(src:simd_select_bitmask(m:k, yes:permute, no:src.as_f64x4()))
23090 }
23091}
23092
23093/// Shuffle double-precision (64-bit) floating-point elements in a across lanes using the corresponding index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23094///
23095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutexvar_pd&expand=4318)
23096#[inline]
23097#[target_feature(enable = "avx512f,avx512vl")]
23098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23099#[cfg_attr(test, assert_instr(vpermpd))]
23100pub fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
23101 unsafe {
23102 let permute: f64x4 = _mm256_permutexvar_pd(idx, a).as_f64x4();
23103 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x4::ZERO))
23104 }
23105}
23106
23107/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23108///
23109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi32&expand=4238)
23110#[inline]
23111#[target_feature(enable = "avx512f")]
23112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23113#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23114pub fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23115 unsafe { transmute(src:vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16())) }
23116}
23117
23118/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23119///
23120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi32&expand=4235)
23121#[inline]
23122#[target_feature(enable = "avx512f")]
23123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23124#[cfg_attr(test, assert_instr(vpermt2d))]
23125pub fn _mm512_mask_permutex2var_epi32(
23126 a: __m512i,
23127 k: __mmask16,
23128 idx: __m512i,
23129 b: __m512i,
23130) -> __m512i {
23131 unsafe {
23132 let permute: i32x16 = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23133 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x16()))
23134 }
23135}
23136
23137/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23138///
23139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi32&expand=4237)
23140#[inline]
23141#[target_feature(enable = "avx512f")]
23142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23143#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23144pub fn _mm512_maskz_permutex2var_epi32(
23145 k: __mmask16,
23146 a: __m512i,
23147 idx: __m512i,
23148 b: __m512i,
23149) -> __m512i {
23150 unsafe {
23151 let permute: i32x16 = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23152 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x16::ZERO))
23153 }
23154}
23155
23156/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23157///
23158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi32&expand=4236)
23159#[inline]
23160#[target_feature(enable = "avx512f")]
23161#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23162#[cfg_attr(test, assert_instr(vpermi2d))]
23163pub fn _mm512_mask2_permutex2var_epi32(
23164 a: __m512i,
23165 idx: __m512i,
23166 k: __mmask16,
23167 b: __m512i,
23168) -> __m512i {
23169 unsafe {
23170 let permute: i32x16 = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
23171 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x16()))
23172 }
23173}
23174
23175/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23176///
23177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi32&expand=4234)
23178#[inline]
23179#[target_feature(enable = "avx512f,avx512vl")]
23180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23181#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23182pub fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23183 unsafe { transmute(src:vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8())) }
23184}
23185
23186/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23187///
23188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi32&expand=4231)
23189#[inline]
23190#[target_feature(enable = "avx512f,avx512vl")]
23191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23192#[cfg_attr(test, assert_instr(vpermt2d))]
23193pub fn _mm256_mask_permutex2var_epi32(
23194 a: __m256i,
23195 k: __mmask8,
23196 idx: __m256i,
23197 b: __m256i,
23198) -> __m256i {
23199 unsafe {
23200 let permute: i32x8 = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23201 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x8()))
23202 }
23203}
23204
23205/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23206///
23207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi32&expand=4233)
23208#[inline]
23209#[target_feature(enable = "avx512f,avx512vl")]
23210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23211#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23212pub fn _mm256_maskz_permutex2var_epi32(
23213 k: __mmask8,
23214 a: __m256i,
23215 idx: __m256i,
23216 b: __m256i,
23217) -> __m256i {
23218 unsafe {
23219 let permute: i32x8 = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23220 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x8::ZERO))
23221 }
23222}
23223
23224/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23225///
23226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi32&expand=4232)
23227#[inline]
23228#[target_feature(enable = "avx512f,avx512vl")]
23229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23230#[cfg_attr(test, assert_instr(vpermi2d))]
23231pub fn _mm256_mask2_permutex2var_epi32(
23232 a: __m256i,
23233 idx: __m256i,
23234 k: __mmask8,
23235 b: __m256i,
23236) -> __m256i {
23237 unsafe {
23238 let permute: i32x8 = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
23239 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x8()))
23240 }
23241}
23242
23243/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23244///
23245/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi32&expand=4230)
23246#[inline]
23247#[target_feature(enable = "avx512f,avx512vl")]
23248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23249#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23250pub fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23251 unsafe { transmute(src:vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4())) }
23252}
23253
23254/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23255///
23256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi32&expand=4227)
23257#[inline]
23258#[target_feature(enable = "avx512f,avx512vl")]
23259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23260#[cfg_attr(test, assert_instr(vpermt2d))]
23261pub fn _mm_mask_permutex2var_epi32(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23262 unsafe {
23263 let permute: i32x4 = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23264 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i32x4()))
23265 }
23266}
23267
23268/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23269///
23270/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi32&expand=4229)
23271#[inline]
23272#[target_feature(enable = "avx512f,avx512vl")]
23273#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23274#[cfg_attr(test, assert_instr(vperm))] //vpermi2d or vpermt2d
23275pub fn _mm_maskz_permutex2var_epi32(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23276 unsafe {
23277 let permute: i32x4 = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23278 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i32x4::ZERO))
23279 }
23280}
23281
23282/// Shuffle 32-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23283///
23284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi32&expand=4228)
23285#[inline]
23286#[target_feature(enable = "avx512f,avx512vl")]
23287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23288#[cfg_attr(test, assert_instr(vpermi2d))]
23289pub fn _mm_mask2_permutex2var_epi32(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23290 unsafe {
23291 let permute: i32x4 = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
23292 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i32x4()))
23293 }
23294}
23295
23296/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23297///
23298/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_epi64&expand=4250)
23299#[inline]
23300#[target_feature(enable = "avx512f")]
23301#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23302#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23303pub fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
23304 unsafe { transmute(src:vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8())) }
23305}
23306
23307/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23308///
23309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_epi64&expand=4247)
23310#[inline]
23311#[target_feature(enable = "avx512f")]
23312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23313#[cfg_attr(test, assert_instr(vpermt2q))]
23314pub fn _mm512_mask_permutex2var_epi64(
23315 a: __m512i,
23316 k: __mmask8,
23317 idx: __m512i,
23318 b: __m512i,
23319) -> __m512i {
23320 unsafe {
23321 let permute: i64x8 = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23322 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x8()))
23323 }
23324}
23325
23326/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23327///
23328/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_epi64&expand=4249)
23329#[inline]
23330#[target_feature(enable = "avx512f")]
23331#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23332#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23333pub fn _mm512_maskz_permutex2var_epi64(
23334 k: __mmask8,
23335 a: __m512i,
23336 idx: __m512i,
23337 b: __m512i,
23338) -> __m512i {
23339 unsafe {
23340 let permute: i64x8 = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23341 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x8::ZERO))
23342 }
23343}
23344
23345/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23346///
23347/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_epi64&expand=4248)
23348#[inline]
23349#[target_feature(enable = "avx512f")]
23350#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23351#[cfg_attr(test, assert_instr(vpermi2q))]
23352pub fn _mm512_mask2_permutex2var_epi64(
23353 a: __m512i,
23354 idx: __m512i,
23355 k: __mmask8,
23356 b: __m512i,
23357) -> __m512i {
23358 unsafe {
23359 let permute: i64x8 = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
23360 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x8()))
23361 }
23362}
23363
23364/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23365///
23366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_epi64&expand=4246)
23367#[inline]
23368#[target_feature(enable = "avx512f,avx512vl")]
23369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23370#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23371pub fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
23372 unsafe { transmute(src:vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4())) }
23373}
23374
23375/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23376///
23377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_epi64&expand=4243)
23378#[inline]
23379#[target_feature(enable = "avx512f,avx512vl")]
23380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23381#[cfg_attr(test, assert_instr(vpermt2q))]
23382pub fn _mm256_mask_permutex2var_epi64(
23383 a: __m256i,
23384 k: __mmask8,
23385 idx: __m256i,
23386 b: __m256i,
23387) -> __m256i {
23388 unsafe {
23389 let permute: i64x4 = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23390 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x4()))
23391 }
23392}
23393
23394/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23395///
23396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_epi64&expand=4245)
23397#[inline]
23398#[target_feature(enable = "avx512f,avx512vl")]
23399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23400#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23401pub fn _mm256_maskz_permutex2var_epi64(
23402 k: __mmask8,
23403 a: __m256i,
23404 idx: __m256i,
23405 b: __m256i,
23406) -> __m256i {
23407 unsafe {
23408 let permute: i64x4 = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23409 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x4::ZERO))
23410 }
23411}
23412
23413/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23414///
23415/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_epi64&expand=4244)
23416#[inline]
23417#[target_feature(enable = "avx512f,avx512vl")]
23418#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23419#[cfg_attr(test, assert_instr(vpermi2q))]
23420pub fn _mm256_mask2_permutex2var_epi64(
23421 a: __m256i,
23422 idx: __m256i,
23423 k: __mmask8,
23424 b: __m256i,
23425) -> __m256i {
23426 unsafe {
23427 let permute: i64x4 = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
23428 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x4()))
23429 }
23430}
23431
23432/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23433///
23434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_epi64&expand=4242)
23435#[inline]
23436#[target_feature(enable = "avx512f,avx512vl")]
23437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23438#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23439pub fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23440 unsafe { transmute(src:vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2())) }
23441}
23442
23443/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23444///
23445/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_epi64&expand=4239)
23446#[inline]
23447#[target_feature(enable = "avx512f,avx512vl")]
23448#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23449#[cfg_attr(test, assert_instr(vpermt2q))]
23450pub fn _mm_mask_permutex2var_epi64(a: __m128i, k: __mmask8, idx: __m128i, b: __m128i) -> __m128i {
23451 unsafe {
23452 let permute: i64x2 = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23453 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_i64x2()))
23454 }
23455}
23456
23457/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23458///
23459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_epi64&expand=4241)
23460#[inline]
23461#[target_feature(enable = "avx512f,avx512vl")]
23462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23463#[cfg_attr(test, assert_instr(vperm))] //vpermi2q or vpermt2q
23464pub fn _mm_maskz_permutex2var_epi64(k: __mmask8, a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
23465 unsafe {
23466 let permute: i64x2 = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23467 transmute(src:simd_select_bitmask(m:k, yes:permute, no:i64x2::ZERO))
23468 }
23469}
23470
23471/// Shuffle 64-bit integers in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23472///
23473/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_epi64&expand=4240)
23474#[inline]
23475#[target_feature(enable = "avx512f,avx512vl")]
23476#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23477#[cfg_attr(test, assert_instr(vpermi2q))]
23478pub fn _mm_mask2_permutex2var_epi64(a: __m128i, idx: __m128i, k: __mmask8, b: __m128i) -> __m128i {
23479 unsafe {
23480 let permute: i64x2 = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
23481 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx.as_i64x2()))
23482 }
23483}
23484
23485/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23486///
23487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_ps&expand=4286)
23488#[inline]
23489#[target_feature(enable = "avx512f")]
23490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23491#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23492pub fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
23493 unsafe { transmute(src:vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16())) }
23494}
23495
23496/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23497///
23498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_ps&expand=4283)
23499#[inline]
23500#[target_feature(enable = "avx512f")]
23501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23502#[cfg_attr(test, assert_instr(vpermt2ps))]
23503pub fn _mm512_mask_permutex2var_ps(a: __m512, k: __mmask16, idx: __m512i, b: __m512) -> __m512 {
23504 unsafe {
23505 let permute: f32x16 = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23506 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x16()))
23507 }
23508}
23509
23510/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23511///
23512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_ps&expand=4285)
23513#[inline]
23514#[target_feature(enable = "avx512f")]
23515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23516#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23517pub fn _mm512_maskz_permutex2var_ps(k: __mmask16, a: __m512, idx: __m512i, b: __m512) -> __m512 {
23518 unsafe {
23519 let permute: f32x16 = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23520 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x16::ZERO))
23521 }
23522}
23523
23524/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23525///
23526/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_ps&expand=4284)
23527#[inline]
23528#[target_feature(enable = "avx512f")]
23529#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23530#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23531pub fn _mm512_mask2_permutex2var_ps(a: __m512, idx: __m512i, k: __mmask16, b: __m512) -> __m512 {
23532 unsafe {
23533 let permute: f32x16 = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
23534 let idx: f32x16 = _mm512_castsi512_ps(idx).as_f32x16();
23535 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23536 }
23537}
23538
23539/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23540///
23541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_ps&expand=4282)
23542#[inline]
23543#[target_feature(enable = "avx512f,avx512vl")]
23544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23545#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23546pub fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
23547 unsafe { transmute(src:vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8())) }
23548}
23549
23550/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23551///
23552/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_ps&expand=4279)
23553#[inline]
23554#[target_feature(enable = "avx512f,avx512vl")]
23555#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23556#[cfg_attr(test, assert_instr(vpermt2ps))]
23557pub fn _mm256_mask_permutex2var_ps(a: __m256, k: __mmask8, idx: __m256i, b: __m256) -> __m256 {
23558 unsafe {
23559 let permute: f32x8 = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23560 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x8()))
23561 }
23562}
23563
23564/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23565///
23566/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_ps&expand=4281)
23567#[inline]
23568#[target_feature(enable = "avx512f,avx512vl")]
23569#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23570#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23571pub fn _mm256_maskz_permutex2var_ps(k: __mmask8, a: __m256, idx: __m256i, b: __m256) -> __m256 {
23572 unsafe {
23573 let permute: f32x8 = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23574 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x8::ZERO))
23575 }
23576}
23577
23578/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23579///
23580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_ps&expand=4280)
23581#[inline]
23582#[target_feature(enable = "avx512f,avx512vl")]
23583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23584#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23585pub fn _mm256_mask2_permutex2var_ps(a: __m256, idx: __m256i, k: __mmask8, b: __m256) -> __m256 {
23586 unsafe {
23587 let permute: f32x8 = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
23588 let idx: f32x8 = _mm256_castsi256_ps(idx).as_f32x8();
23589 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23590 }
23591}
23592
23593/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23594///
23595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_ps&expand=4278)
23596#[inline]
23597#[target_feature(enable = "avx512f,avx512vl")]
23598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23599#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23600pub fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
23601 unsafe { transmute(src:vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4())) }
23602}
23603
23604/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23605///
23606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_ps&expand=4275)
23607#[inline]
23608#[target_feature(enable = "avx512f,avx512vl")]
23609#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23610#[cfg_attr(test, assert_instr(vpermt2ps))]
23611pub fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
23612 unsafe {
23613 let permute: f32x4 = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23614 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f32x4()))
23615 }
23616}
23617
23618/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23619///
23620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_ps&expand=4277)
23621#[inline]
23622#[target_feature(enable = "avx512f,avx512vl")]
23623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23624#[cfg_attr(test, assert_instr(vperm))] //vpermi2ps or vpermt2ps
23625pub fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
23626 unsafe {
23627 let permute: f32x4 = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23628 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f32x4::ZERO))
23629 }
23630}
23631
23632/// Shuffle single-precision (32-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set).
23633///
23634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_ps&expand=4276)
23635#[inline]
23636#[target_feature(enable = "avx512f,avx512vl")]
23637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23638#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2ps, but it shows vpermt2ps
23639pub fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
23640 unsafe {
23641 let permute: f32x4 = _mm_permutex2var_ps(a, idx, b).as_f32x4();
23642 let idx: f32x4 = _mm_castsi128_ps(idx).as_f32x4();
23643 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23644 }
23645}
23646
23647/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23648///
23649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_permutex2var_pd&expand=4274)
23650#[inline]
23651#[target_feature(enable = "avx512f")]
23652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23653#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23654pub fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23655 unsafe { transmute(src:vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8())) }
23656}
23657
23658/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23659///
23660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_permutex2var_pd&expand=4271)
23661#[inline]
23662#[target_feature(enable = "avx512f")]
23663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23664#[cfg_attr(test, assert_instr(vpermt2pd))]
23665pub fn _mm512_mask_permutex2var_pd(a: __m512d, k: __mmask8, idx: __m512i, b: __m512d) -> __m512d {
23666 unsafe {
23667 let permute: f64x8 = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23668 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x8()))
23669 }
23670}
23671
23672/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23673///
23674/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_permutex2var_pd&expand=4273)
23675#[inline]
23676#[target_feature(enable = "avx512f")]
23677#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23678#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23679pub fn _mm512_maskz_permutex2var_pd(k: __mmask8, a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
23680 unsafe {
23681 let permute: f64x8 = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23682 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x8::ZERO))
23683 }
23684}
23685
23686/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23687///
23688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2_permutex2var_pd&expand=4272)
23689#[inline]
23690#[target_feature(enable = "avx512f")]
23691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23692#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23693pub fn _mm512_mask2_permutex2var_pd(a: __m512d, idx: __m512i, k: __mmask8, b: __m512d) -> __m512d {
23694 unsafe {
23695 let permute: f64x8 = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
23696 let idx: f64x8 = _mm512_castsi512_pd(idx).as_f64x8();
23697 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23698 }
23699}
23700
23701/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23702///
23703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_permutex2var_pd&expand=4270)
23704#[inline]
23705#[target_feature(enable = "avx512f,avx512vl")]
23706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23707#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23708pub fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23709 unsafe { transmute(src:vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4())) }
23710}
23711
23712/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23713///
23714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_permutex2var_pd&expand=4267)
23715#[inline]
23716#[target_feature(enable = "avx512f,avx512vl")]
23717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23718#[cfg_attr(test, assert_instr(vpermt2pd))]
23719pub fn _mm256_mask_permutex2var_pd(a: __m256d, k: __mmask8, idx: __m256i, b: __m256d) -> __m256d {
23720 unsafe {
23721 let permute: f64x4 = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23722 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x4()))
23723 }
23724}
23725
23726/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23727///
23728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_permutex2var_pd&expand=4269)
23729#[inline]
23730#[target_feature(enable = "avx512f,avx512vl")]
23731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23732#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23733pub fn _mm256_maskz_permutex2var_pd(k: __mmask8, a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
23734 unsafe {
23735 let permute: f64x4 = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23736 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x4::ZERO))
23737 }
23738}
23739
23740/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23741///
23742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask2_permutex2var_pd&expand=4268)
23743#[inline]
23744#[target_feature(enable = "avx512f,avx512vl")]
23745#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23746#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23747pub fn _mm256_mask2_permutex2var_pd(a: __m256d, idx: __m256i, k: __mmask8, b: __m256d) -> __m256d {
23748 unsafe {
23749 let permute: f64x4 = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
23750 let idx: f64x4 = _mm256_castsi256_pd(idx).as_f64x4();
23751 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23752 }
23753}
23754
23755/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst.
23756///
23757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permutex2var_pd&expand=4266)
23758#[inline]
23759#[target_feature(enable = "avx512f,avx512vl")]
23760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23761#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23762pub fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23763 unsafe { transmute(src:vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2())) }
23764}
23765
23766/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
23767///
23768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_permutex2var_pd&expand=4263)
23769#[inline]
23770#[target_feature(enable = "avx512f,avx512vl")]
23771#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23772#[cfg_attr(test, assert_instr(vpermt2pd))]
23773pub fn _mm_mask_permutex2var_pd(a: __m128d, k: __mmask8, idx: __m128i, b: __m128d) -> __m128d {
23774 unsafe {
23775 let permute: f64x2 = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23776 transmute(src:simd_select_bitmask(m:k, yes:permute, no:a.as_f64x2()))
23777 }
23778}
23779
23780/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23781///
23782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_permutex2var_pd&expand=4265)
23783#[inline]
23784#[target_feature(enable = "avx512f,avx512vl")]
23785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23786#[cfg_attr(test, assert_instr(vperm))] //vpermi2pd or vpermt2pd
23787pub fn _mm_maskz_permutex2var_pd(k: __mmask8, a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
23788 unsafe {
23789 let permute: f64x2 = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23790 transmute(src:simd_select_bitmask(m:k, yes:permute, no:f64x2::ZERO))
23791 }
23792}
23793
23794/// Shuffle double-precision (64-bit) floating-point elements in a and b across lanes using the corresponding selector and index in idx, and store the results in dst using writemask k (elements are copied from idx when the corresponding mask bit is not set)
23795///
23796/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask2_permutex2var_pd&expand=4264)
23797#[inline]
23798#[target_feature(enable = "avx512f,avx512vl")]
23799#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23800#[cfg_attr(test, assert_instr(vperm))] //should be vpermi2pd, but it shows vpermt2pd
23801pub fn _mm_mask2_permutex2var_pd(a: __m128d, idx: __m128i, k: __mmask8, b: __m128d) -> __m128d {
23802 unsafe {
23803 let permute: f64x2 = _mm_permutex2var_pd(a, idx, b).as_f64x2();
23804 let idx: f64x2 = _mm_castsi128_pd(idx).as_f64x2();
23805 transmute(src:simd_select_bitmask(m:k, yes:permute, no:idx))
23806 }
23807}
23808
23809/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
23810///
23811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_epi32&expand=5150)
23812#[inline]
23813#[target_feature(enable = "avx512f")]
23814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23815#[cfg_attr(test, assert_instr(vshufps, MASK = 9))] //should be vpshufd
23816#[rustc_legacy_const_generics(1)]
23817pub fn _mm512_shuffle_epi32<const MASK: _MM_PERM_ENUM>(a: __m512i) -> __m512i {
23818 unsafe {
23819 static_assert_uimm_bits!(MASK, 8);
23820 let r: i32x16 = simd_shuffle!(
23821 a.as_i32x16(),
23822 a.as_i32x16(),
23823 [
23824 MASK as u32 & 0b11,
23825 (MASK as u32 >> 2) & 0b11,
23826 (MASK as u32 >> 4) & 0b11,
23827 (MASK as u32 >> 6) & 0b11,
23828 (MASK as u32 & 0b11) + 4,
23829 ((MASK as u32 >> 2) & 0b11) + 4,
23830 ((MASK as u32 >> 4) & 0b11) + 4,
23831 ((MASK as u32 >> 6) & 0b11) + 4,
23832 (MASK as u32 & 0b11) + 8,
23833 ((MASK as u32 >> 2) & 0b11) + 8,
23834 ((MASK as u32 >> 4) & 0b11) + 8,
23835 ((MASK as u32 >> 6) & 0b11) + 8,
23836 (MASK as u32 & 0b11) + 12,
23837 ((MASK as u32 >> 2) & 0b11) + 12,
23838 ((MASK as u32 >> 4) & 0b11) + 12,
23839 ((MASK as u32 >> 6) & 0b11) + 12,
23840 ],
23841 );
23842 transmute(r)
23843 }
23844}
23845
23846/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23847///
23848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_epi32&expand=5148)
23849#[inline]
23850#[target_feature(enable = "avx512f")]
23851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23852#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23853#[rustc_legacy_const_generics(3)]
23854pub fn _mm512_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23855 src: __m512i,
23856 k: __mmask16,
23857 a: __m512i,
23858) -> __m512i {
23859 unsafe {
23860 static_assert_uimm_bits!(MASK, 8);
23861 let r: __m512i = _mm512_shuffle_epi32::<MASK>(a);
23862 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
23863 }
23864}
23865
23866/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23867///
23868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_epi32&expand=5149)
23869#[inline]
23870#[target_feature(enable = "avx512f")]
23871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23872#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23873#[rustc_legacy_const_generics(2)]
23874pub fn _mm512_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask16, a: __m512i) -> __m512i {
23875 unsafe {
23876 static_assert_uimm_bits!(MASK, 8);
23877 let r: __m512i = _mm512_shuffle_epi32::<MASK>(a);
23878 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
23879 }
23880}
23881
23882/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23883///
23884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_epi32&expand=5145)
23885#[inline]
23886#[target_feature(enable = "avx512f,avx512vl")]
23887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23888#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23889#[rustc_legacy_const_generics(3)]
23890pub fn _mm256_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23891 src: __m256i,
23892 k: __mmask8,
23893 a: __m256i,
23894) -> __m256i {
23895 unsafe {
23896 static_assert_uimm_bits!(MASK, 8);
23897 let r: __m256i = _mm256_shuffle_epi32::<MASK>(a);
23898 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
23899 }
23900}
23901
23902/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23903///
23904/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_epi32&expand=5146)
23905#[inline]
23906#[target_feature(enable = "avx512f,avx512vl")]
23907#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23908#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23909#[rustc_legacy_const_generics(2)]
23910pub fn _mm256_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m256i) -> __m256i {
23911 unsafe {
23912 static_assert_uimm_bits!(MASK, 8);
23913 let r: __m256i = _mm256_shuffle_epi32::<MASK>(a);
23914 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
23915 }
23916}
23917
23918/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23919///
23920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_epi32&expand=5142)
23921#[inline]
23922#[target_feature(enable = "avx512f,avx512vl")]
23923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23924#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23925#[rustc_legacy_const_generics(3)]
23926pub fn _mm_mask_shuffle_epi32<const MASK: _MM_PERM_ENUM>(
23927 src: __m128i,
23928 k: __mmask8,
23929 a: __m128i,
23930) -> __m128i {
23931 unsafe {
23932 static_assert_uimm_bits!(MASK, 8);
23933 let r: __m128i = _mm_shuffle_epi32::<MASK>(a);
23934 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
23935 }
23936}
23937
23938/// Shuffle 32-bit integers in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
23939///
23940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_epi32&expand=5143)
23941#[inline]
23942#[target_feature(enable = "avx512f,avx512vl")]
23943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23944#[cfg_attr(test, assert_instr(vpshufd, MASK = 9))]
23945#[rustc_legacy_const_generics(2)]
23946pub fn _mm_maskz_shuffle_epi32<const MASK: _MM_PERM_ENUM>(k: __mmask8, a: __m128i) -> __m128i {
23947 unsafe {
23948 static_assert_uimm_bits!(MASK, 8);
23949 let r: __m128i = _mm_shuffle_epi32::<MASK>(a);
23950 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
23951 }
23952}
23953
23954/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst.
23955///
23956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_ps&expand=5203)
23957#[inline]
23958#[target_feature(enable = "avx512f")]
23959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23960#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
23961#[rustc_legacy_const_generics(2)]
23962pub fn _mm512_shuffle_ps<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
23963 unsafe {
23964 static_assert_uimm_bits!(MASK, 8);
23965 simd_shuffle!(
23966 a,
23967 b,
23968 [
23969 MASK as u32 & 0b11,
23970 (MASK as u32 >> 2) & 0b11,
23971 ((MASK as u32 >> 4) & 0b11) + 16,
23972 ((MASK as u32 >> 6) & 0b11) + 16,
23973 (MASK as u32 & 0b11) + 4,
23974 ((MASK as u32 >> 2) & 0b11) + 4,
23975 ((MASK as u32 >> 4) & 0b11) + 20,
23976 ((MASK as u32 >> 6) & 0b11) + 20,
23977 (MASK as u32 & 0b11) + 8,
23978 ((MASK as u32 >> 2) & 0b11) + 8,
23979 ((MASK as u32 >> 4) & 0b11) + 24,
23980 ((MASK as u32 >> 6) & 0b11) + 24,
23981 (MASK as u32 & 0b11) + 12,
23982 ((MASK as u32 >> 2) & 0b11) + 12,
23983 ((MASK as u32 >> 4) & 0b11) + 28,
23984 ((MASK as u32 >> 6) & 0b11) + 28,
23985 ],
23986 )
23987 }
23988}
23989
23990/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
23991///
23992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_ps&expand=5201)
23993#[inline]
23994#[target_feature(enable = "avx512f")]
23995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
23996#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
23997#[rustc_legacy_const_generics(4)]
23998pub fn _mm512_mask_shuffle_ps<const MASK: i32>(
23999 src: __m512,
24000 k: __mmask16,
24001 a: __m512,
24002 b: __m512,
24003) -> __m512 {
24004 unsafe {
24005 static_assert_uimm_bits!(MASK, 8);
24006 let r: __m512 = _mm512_shuffle_ps::<MASK>(a, b);
24007 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
24008 }
24009}
24010
24011/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24012///
24013/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_ps&expand=5202)
24014#[inline]
24015#[target_feature(enable = "avx512f")]
24016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24017#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24018#[rustc_legacy_const_generics(3)]
24019pub fn _mm512_maskz_shuffle_ps<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24020 unsafe {
24021 static_assert_uimm_bits!(MASK, 8);
24022 let r: __m512 = _mm512_shuffle_ps::<MASK>(a, b);
24023 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
24024 }
24025}
24026
24027/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24028///
24029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_ps&expand=5198)
24030#[inline]
24031#[target_feature(enable = "avx512f,avx512vl")]
24032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24033#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24034#[rustc_legacy_const_generics(4)]
24035pub fn _mm256_mask_shuffle_ps<const MASK: i32>(
24036 src: __m256,
24037 k: __mmask8,
24038 a: __m256,
24039 b: __m256,
24040) -> __m256 {
24041 unsafe {
24042 static_assert_uimm_bits!(MASK, 8);
24043 let r: __m256 = _mm256_shuffle_ps::<MASK>(a, b);
24044 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
24045 }
24046}
24047
24048/// Shuffle single-precision (32-bit) floating-point elements in a within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24049///
24050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_ps&expand=5199)
24051#[inline]
24052#[target_feature(enable = "avx512f,avx512vl")]
24053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24054#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24055#[rustc_legacy_const_generics(3)]
24056pub fn _mm256_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24057 unsafe {
24058 static_assert_uimm_bits!(MASK, 8);
24059 let r: __m256 = _mm256_shuffle_ps::<MASK>(a, b);
24060 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
24061 }
24062}
24063
24064/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24065///
24066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_ps&expand=5195)
24067#[inline]
24068#[target_feature(enable = "avx512f,avx512vl")]
24069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24070#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24071#[rustc_legacy_const_generics(4)]
24072pub fn _mm_mask_shuffle_ps<const MASK: i32>(
24073 src: __m128,
24074 k: __mmask8,
24075 a: __m128,
24076 b: __m128,
24077) -> __m128 {
24078 unsafe {
24079 static_assert_uimm_bits!(MASK, 8);
24080 let r: __m128 = _mm_shuffle_ps::<MASK>(a, b);
24081 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
24082 }
24083}
24084
24085/// Shuffle single-precision (32-bit) floating-point elements in a using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24086///
24087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_ps&expand=5196)
24088#[inline]
24089#[target_feature(enable = "avx512f,avx512vl")]
24090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24091#[cfg_attr(test, assert_instr(vshufps, MASK = 3))]
24092#[rustc_legacy_const_generics(3)]
24093pub fn _mm_maskz_shuffle_ps<const MASK: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
24094 unsafe {
24095 static_assert_uimm_bits!(MASK, 8);
24096 let r: __m128 = _mm_shuffle_ps::<MASK>(a, b);
24097 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
24098 }
24099}
24100
24101/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst.
24102///
24103/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_pd&expand=5192)
24104#[inline]
24105#[target_feature(enable = "avx512f")]
24106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24107#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24108#[rustc_legacy_const_generics(2)]
24109pub fn _mm512_shuffle_pd<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24110 unsafe {
24111 static_assert_uimm_bits!(MASK, 8);
24112 simd_shuffle!(
24113 a,
24114 b,
24115 [
24116 MASK as u32 & 0b1,
24117 ((MASK as u32 >> 1) & 0b1) + 8,
24118 ((MASK as u32 >> 2) & 0b1) + 2,
24119 ((MASK as u32 >> 3) & 0b1) + 10,
24120 ((MASK as u32 >> 4) & 0b1) + 4,
24121 ((MASK as u32 >> 5) & 0b1) + 12,
24122 ((MASK as u32 >> 6) & 0b1) + 6,
24123 ((MASK as u32 >> 7) & 0b1) + 14,
24124 ],
24125 )
24126 }
24127}
24128
24129/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24130///
24131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_pd&expand=5190)
24132#[inline]
24133#[target_feature(enable = "avx512f")]
24134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24135#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24136#[rustc_legacy_const_generics(4)]
24137pub fn _mm512_mask_shuffle_pd<const MASK: i32>(
24138 src: __m512d,
24139 k: __mmask8,
24140 a: __m512d,
24141 b: __m512d,
24142) -> __m512d {
24143 unsafe {
24144 static_assert_uimm_bits!(MASK, 8);
24145 let r: __m512d = _mm512_shuffle_pd::<MASK>(a, b);
24146 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
24147 }
24148}
24149
24150/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24151///
24152/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_pd&expand=5191)
24153#[inline]
24154#[target_feature(enable = "avx512f")]
24155#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24156#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24157#[rustc_legacy_const_generics(3)]
24158pub fn _mm512_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24159 unsafe {
24160 static_assert_uimm_bits!(MASK, 8);
24161 let r: __m512d = _mm512_shuffle_pd::<MASK>(a, b);
24162 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
24163 }
24164}
24165
24166/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24167///
24168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_pd&expand=5187)
24169#[inline]
24170#[target_feature(enable = "avx512f,avx512vl")]
24171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24172#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24173#[rustc_legacy_const_generics(4)]
24174pub fn _mm256_mask_shuffle_pd<const MASK: i32>(
24175 src: __m256d,
24176 k: __mmask8,
24177 a: __m256d,
24178 b: __m256d,
24179) -> __m256d {
24180 unsafe {
24181 static_assert_uimm_bits!(MASK, 8);
24182 let r: __m256d = _mm256_shuffle_pd::<MASK>(a, b);
24183 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
24184 }
24185}
24186
24187/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24188///
24189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_pd&expand=5188)
24190#[inline]
24191#[target_feature(enable = "avx512f,avx512vl")]
24192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24193#[cfg_attr(test, assert_instr(vshufpd, MASK = 3))]
24194#[rustc_legacy_const_generics(3)]
24195pub fn _mm256_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24196 unsafe {
24197 static_assert_uimm_bits!(MASK, 8);
24198 let r: __m256d = _mm256_shuffle_pd::<MASK>(a, b);
24199 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
24200 }
24201}
24202
24203/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24204///
24205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_shuffle_pd&expand=5184)
24206#[inline]
24207#[target_feature(enable = "avx512f,avx512vl")]
24208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24209#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
24210#[rustc_legacy_const_generics(4)]
24211pub fn _mm_mask_shuffle_pd<const MASK: i32>(
24212 src: __m128d,
24213 k: __mmask8,
24214 a: __m128d,
24215 b: __m128d,
24216) -> __m128d {
24217 unsafe {
24218 static_assert_uimm_bits!(MASK, 8);
24219 let r: __m128d = _mm_shuffle_pd::<MASK>(a, b);
24220 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:src.as_f64x2()))
24221 }
24222}
24223
24224/// Shuffle double-precision (64-bit) floating-point elements within 128-bit lanes using the control in imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24225///
24226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_shuffle_pd&expand=5185)
24227#[inline]
24228#[target_feature(enable = "avx512f,avx512vl")]
24229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24230#[cfg_attr(test, assert_instr(vshufpd, MASK = 1))]
24231#[rustc_legacy_const_generics(3)]
24232pub fn _mm_maskz_shuffle_pd<const MASK: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
24233 unsafe {
24234 static_assert_uimm_bits!(MASK, 8);
24235 let r: __m128d = _mm_shuffle_pd::<MASK>(a, b);
24236 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x2(), no:f64x2::ZERO))
24237 }
24238}
24239
24240/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24241///
24242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i32x4&expand=5177)
24243#[inline]
24244#[target_feature(enable = "avx512f")]
24245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24246#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_01_01_01))] //should be vshufi32x4
24247#[rustc_legacy_const_generics(2)]
24248pub fn _mm512_shuffle_i32x4<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24249 unsafe {
24250 static_assert_uimm_bits!(MASK, 8);
24251 let a = a.as_i32x16();
24252 let b = b.as_i32x16();
24253 let r: i32x16 = simd_shuffle!(
24254 a,
24255 b,
24256 [
24257 (MASK as u32 & 0b11) * 4 + 0,
24258 (MASK as u32 & 0b11) * 4 + 1,
24259 (MASK as u32 & 0b11) * 4 + 2,
24260 (MASK as u32 & 0b11) * 4 + 3,
24261 ((MASK as u32 >> 2) & 0b11) * 4 + 0,
24262 ((MASK as u32 >> 2) & 0b11) * 4 + 1,
24263 ((MASK as u32 >> 2) & 0b11) * 4 + 2,
24264 ((MASK as u32 >> 2) & 0b11) * 4 + 3,
24265 ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
24266 ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
24267 ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
24268 ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
24269 ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
24270 ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
24271 ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
24272 ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
24273 ],
24274 );
24275 transmute(r)
24276 }
24277}
24278
24279/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24280///
24281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i32x4&expand=5175)
24282#[inline]
24283#[target_feature(enable = "avx512f")]
24284#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24285#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
24286#[rustc_legacy_const_generics(4)]
24287pub fn _mm512_mask_shuffle_i32x4<const MASK: i32>(
24288 src: __m512i,
24289 k: __mmask16,
24290 a: __m512i,
24291 b: __m512i,
24292) -> __m512i {
24293 unsafe {
24294 static_assert_uimm_bits!(MASK, 8);
24295 let r: __m512i = _mm512_shuffle_i32x4::<MASK>(a, b);
24296 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
24297 }
24298}
24299
24300/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24301///
24302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i32x4&expand=5176)
24303#[inline]
24304#[target_feature(enable = "avx512f")]
24305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24306#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b10_11_01_01))]
24307#[rustc_legacy_const_generics(3)]
24308pub fn _mm512_maskz_shuffle_i32x4<const MASK: i32>(
24309 k: __mmask16,
24310 a: __m512i,
24311 b: __m512i,
24312) -> __m512i {
24313 unsafe {
24314 static_assert_uimm_bits!(MASK, 8);
24315 let r: __m512i = _mm512_shuffle_i32x4::<MASK>(a, b);
24316 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
24317 }
24318}
24319
24320/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst.
24321///
24322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i32x4&expand=5174)
24323#[inline]
24324#[target_feature(enable = "avx512f,avx512vl")]
24325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24326#[cfg_attr(test, assert_instr(vperm, MASK = 0b11))] //should be vshufi32x4
24327#[rustc_legacy_const_generics(2)]
24328pub fn _mm256_shuffle_i32x4<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24329 unsafe {
24330 static_assert_uimm_bits!(MASK, 8);
24331 let a: i32x8 = a.as_i32x8();
24332 let b: i32x8 = b.as_i32x8();
24333 let r: i32x8 = simd_shuffle!(
24334 a,
24335 b,
24336 [
24337 (MASK as u32 & 0b1) * 4 + 0,
24338 (MASK as u32 & 0b1) * 4 + 1,
24339 (MASK as u32 & 0b1) * 4 + 2,
24340 (MASK as u32 & 0b1) * 4 + 3,
24341 ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
24342 ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
24343 ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
24344 ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
24345 ],
24346 );
24347 transmute(src:r)
24348 }
24349}
24350
24351/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24352///
24353/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i32x4&expand=5172)
24354#[inline]
24355#[target_feature(enable = "avx512f,avx512vl")]
24356#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24357#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
24358#[rustc_legacy_const_generics(4)]
24359pub fn _mm256_mask_shuffle_i32x4<const MASK: i32>(
24360 src: __m256i,
24361 k: __mmask8,
24362 a: __m256i,
24363 b: __m256i,
24364) -> __m256i {
24365 unsafe {
24366 static_assert_uimm_bits!(MASK, 8);
24367 let r: __m256i = _mm256_shuffle_i32x4::<MASK>(a, b);
24368 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
24369 }
24370}
24371
24372/// Shuffle 128-bits (composed of 4 32-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24373///
24374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i32x4&expand=5173)
24375#[inline]
24376#[target_feature(enable = "avx512f,avx512vl")]
24377#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24378#[cfg_attr(test, assert_instr(vshufi32x4, MASK = 0b11))]
24379#[rustc_legacy_const_generics(3)]
24380pub fn _mm256_maskz_shuffle_i32x4<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24381 unsafe {
24382 static_assert_uimm_bits!(MASK, 8);
24383 let r: __m256i = _mm256_shuffle_i32x4::<MASK>(a, b);
24384 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
24385 }
24386}
24387
24388/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24389///
24390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_i64x2&expand=5183)
24391#[inline]
24392#[target_feature(enable = "avx512f")]
24393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24394#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24395#[rustc_legacy_const_generics(2)]
24396pub fn _mm512_shuffle_i64x2<const MASK: i32>(a: __m512i, b: __m512i) -> __m512i {
24397 unsafe {
24398 static_assert_uimm_bits!(MASK, 8);
24399 let a: i64x8 = a.as_i64x8();
24400 let b: i64x8 = b.as_i64x8();
24401 let r: i64x8 = simd_shuffle!(
24402 a,
24403 b,
24404 [
24405 (MASK as u32 & 0b11) * 2 + 0,
24406 (MASK as u32 & 0b11) * 2 + 1,
24407 ((MASK as u32 >> 2) & 0b11) * 2 + 0,
24408 ((MASK as u32 >> 2) & 0b11) * 2 + 1,
24409 ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
24410 ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
24411 ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
24412 ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
24413 ],
24414 );
24415 transmute(src:r)
24416 }
24417}
24418
24419/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24420///
24421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_i64x2&expand=5181)
24422#[inline]
24423#[target_feature(enable = "avx512f")]
24424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24425#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24426#[rustc_legacy_const_generics(4)]
24427pub fn _mm512_mask_shuffle_i64x2<const MASK: i32>(
24428 src: __m512i,
24429 k: __mmask8,
24430 a: __m512i,
24431 b: __m512i,
24432) -> __m512i {
24433 unsafe {
24434 static_assert_uimm_bits!(MASK, 8);
24435 let r: __m512i = _mm512_shuffle_i64x2::<MASK>(a, b);
24436 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
24437 }
24438}
24439
24440/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24441///
24442/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_i64x2&expand=5182)
24443#[inline]
24444#[target_feature(enable = "avx512f")]
24445#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24446#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b10_11_11_11))]
24447#[rustc_legacy_const_generics(3)]
24448pub fn _mm512_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
24449 unsafe {
24450 static_assert_uimm_bits!(MASK, 8);
24451 let r: __m512i = _mm512_shuffle_i64x2::<MASK>(a, b);
24452 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
24453 }
24454}
24455
24456/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst.
24457///
24458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_i64x2&expand=5180)
24459#[inline]
24460#[target_feature(enable = "avx512f,avx512vl")]
24461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24462#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshufi64x2
24463#[rustc_legacy_const_generics(2)]
24464pub fn _mm256_shuffle_i64x2<const MASK: i32>(a: __m256i, b: __m256i) -> __m256i {
24465 unsafe {
24466 static_assert_uimm_bits!(MASK, 8);
24467 let a: i64x4 = a.as_i64x4();
24468 let b: i64x4 = b.as_i64x4();
24469 let r: i64x4 = simd_shuffle!(
24470 a,
24471 b,
24472 [
24473 (MASK as u32 & 0b1) * 2 + 0,
24474 (MASK as u32 & 0b1) * 2 + 1,
24475 ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
24476 ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
24477 ],
24478 );
24479 transmute(src:r)
24480 }
24481}
24482
24483/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24484///
24485/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_i64x2&expand=5178)
24486#[inline]
24487#[target_feature(enable = "avx512f,avx512vl")]
24488#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24489#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
24490#[rustc_legacy_const_generics(4)]
24491pub fn _mm256_mask_shuffle_i64x2<const MASK: i32>(
24492 src: __m256i,
24493 k: __mmask8,
24494 a: __m256i,
24495 b: __m256i,
24496) -> __m256i {
24497 unsafe {
24498 static_assert_uimm_bits!(MASK, 8);
24499 let r: __m256i = _mm256_shuffle_i64x2::<MASK>(a, b);
24500 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
24501 }
24502}
24503
24504/// Shuffle 128-bits (composed of 2 64-bit integers) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24505///
24506/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_i64x2&expand=5179)
24507#[inline]
24508#[target_feature(enable = "avx512f,avx512vl")]
24509#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24510#[cfg_attr(test, assert_instr(vshufi64x2, MASK = 0b11))]
24511#[rustc_legacy_const_generics(3)]
24512pub fn _mm256_maskz_shuffle_i64x2<const MASK: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
24513 unsafe {
24514 static_assert_uimm_bits!(MASK, 8);
24515 let r: __m256i = _mm256_shuffle_i64x2::<MASK>(a, b);
24516 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
24517 }
24518}
24519
24520/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24521///
24522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f32x4&expand=5165)
24523#[inline]
24524#[target_feature(enable = "avx512f")]
24525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24526#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b1011))] //should be vshuff32x4, but generate vshuff64x2
24527#[rustc_legacy_const_generics(2)]
24528pub fn _mm512_shuffle_f32x4<const MASK: i32>(a: __m512, b: __m512) -> __m512 {
24529 unsafe {
24530 static_assert_uimm_bits!(MASK, 8);
24531 let a = a.as_f32x16();
24532 let b = b.as_f32x16();
24533 let r: f32x16 = simd_shuffle!(
24534 a,
24535 b,
24536 [
24537 (MASK as u32 & 0b11) * 4 + 0,
24538 (MASK as u32 & 0b11) * 4 + 1,
24539 (MASK as u32 & 0b11) * 4 + 2,
24540 (MASK as u32 & 0b11) * 4 + 3,
24541 ((MASK as u32 >> 2) & 0b11) * 4 + 0,
24542 ((MASK as u32 >> 2) & 0b11) * 4 + 1,
24543 ((MASK as u32 >> 2) & 0b11) * 4 + 2,
24544 ((MASK as u32 >> 2) & 0b11) * 4 + 3,
24545 ((MASK as u32 >> 4) & 0b11) * 4 + 0 + 16,
24546 ((MASK as u32 >> 4) & 0b11) * 4 + 1 + 16,
24547 ((MASK as u32 >> 4) & 0b11) * 4 + 2 + 16,
24548 ((MASK as u32 >> 4) & 0b11) * 4 + 3 + 16,
24549 ((MASK as u32 >> 6) & 0b11) * 4 + 0 + 16,
24550 ((MASK as u32 >> 6) & 0b11) * 4 + 1 + 16,
24551 ((MASK as u32 >> 6) & 0b11) * 4 + 2 + 16,
24552 ((MASK as u32 >> 6) & 0b11) * 4 + 3 + 16,
24553 ],
24554 );
24555 transmute(r)
24556 }
24557}
24558
24559/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24560///
24561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f32x4&expand=5163)
24562#[inline]
24563#[target_feature(enable = "avx512f")]
24564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24565#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
24566#[rustc_legacy_const_generics(4)]
24567pub fn _mm512_mask_shuffle_f32x4<const MASK: i32>(
24568 src: __m512,
24569 k: __mmask16,
24570 a: __m512,
24571 b: __m512,
24572) -> __m512 {
24573 unsafe {
24574 static_assert_uimm_bits!(MASK, 8);
24575 let r: __m512 = _mm512_shuffle_f32x4::<MASK>(a, b);
24576 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
24577 }
24578}
24579
24580/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24581///
24582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f32x4&expand=5164)
24583#[inline]
24584#[target_feature(enable = "avx512f")]
24585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24586#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b1011))]
24587#[rustc_legacy_const_generics(3)]
24588pub fn _mm512_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask16, a: __m512, b: __m512) -> __m512 {
24589 unsafe {
24590 static_assert_uimm_bits!(MASK, 8);
24591 let r: __m512 = _mm512_shuffle_f32x4::<MASK>(a, b);
24592 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
24593 }
24594}
24595
24596/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24597///
24598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f32x4&expand=5162)
24599#[inline]
24600#[target_feature(enable = "avx512f,avx512vl")]
24601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24602#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff32x4
24603#[rustc_legacy_const_generics(2)]
24604pub fn _mm256_shuffle_f32x4<const MASK: i32>(a: __m256, b: __m256) -> __m256 {
24605 unsafe {
24606 static_assert_uimm_bits!(MASK, 8);
24607 let a: f32x8 = a.as_f32x8();
24608 let b: f32x8 = b.as_f32x8();
24609 let r: f32x8 = simd_shuffle!(
24610 a,
24611 b,
24612 [
24613 (MASK as u32 & 0b1) * 4 + 0,
24614 (MASK as u32 & 0b1) * 4 + 1,
24615 (MASK as u32 & 0b1) * 4 + 2,
24616 (MASK as u32 & 0b1) * 4 + 3,
24617 ((MASK as u32 >> 1) & 0b1) * 4 + 0 + 8,
24618 ((MASK as u32 >> 1) & 0b1) * 4 + 1 + 8,
24619 ((MASK as u32 >> 1) & 0b1) * 4 + 2 + 8,
24620 ((MASK as u32 >> 1) & 0b1) * 4 + 3 + 8,
24621 ],
24622 );
24623 transmute(src:r)
24624 }
24625}
24626
24627/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24628///
24629/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f32x4&expand=5160)
24630#[inline]
24631#[target_feature(enable = "avx512f,avx512vl")]
24632#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24633#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
24634#[rustc_legacy_const_generics(4)]
24635pub fn _mm256_mask_shuffle_f32x4<const MASK: i32>(
24636 src: __m256,
24637 k: __mmask8,
24638 a: __m256,
24639 b: __m256,
24640) -> __m256 {
24641 unsafe {
24642 static_assert_uimm_bits!(MASK, 8);
24643 let r: __m256 = _mm256_shuffle_f32x4::<MASK>(a, b);
24644 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
24645 }
24646}
24647
24648/// Shuffle 128-bits (composed of 4 single-precision (32-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24649///
24650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f32x4&expand=5161)
24651#[inline]
24652#[target_feature(enable = "avx512f,avx512vl")]
24653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24654#[cfg_attr(test, assert_instr(vshuff32x4, MASK = 0b11))]
24655#[rustc_legacy_const_generics(3)]
24656pub fn _mm256_maskz_shuffle_f32x4<const MASK: i32>(k: __mmask8, a: __m256, b: __m256) -> __m256 {
24657 unsafe {
24658 static_assert_uimm_bits!(MASK, 8);
24659 let r: __m256 = _mm256_shuffle_f32x4::<MASK>(a, b);
24660 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
24661 }
24662}
24663
24664/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24665///
24666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_shuffle_f64x2&expand=5171)
24667#[inline]
24668#[target_feature(enable = "avx512f")]
24669#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24670#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24671#[rustc_legacy_const_generics(2)]
24672pub fn _mm512_shuffle_f64x2<const MASK: i32>(a: __m512d, b: __m512d) -> __m512d {
24673 unsafe {
24674 static_assert_uimm_bits!(MASK, 8);
24675 let a: f64x8 = a.as_f64x8();
24676 let b: f64x8 = b.as_f64x8();
24677 let r: f64x8 = simd_shuffle!(
24678 a,
24679 b,
24680 [
24681 (MASK as u32 & 0b11) * 2 + 0,
24682 (MASK as u32 & 0b11) * 2 + 1,
24683 ((MASK as u32 >> 2) & 0b11) * 2 + 0,
24684 ((MASK as u32 >> 2) & 0b11) * 2 + 1,
24685 ((MASK as u32 >> 4) & 0b11) * 2 + 0 + 8,
24686 ((MASK as u32 >> 4) & 0b11) * 2 + 1 + 8,
24687 ((MASK as u32 >> 6) & 0b11) * 2 + 0 + 8,
24688 ((MASK as u32 >> 6) & 0b11) * 2 + 1 + 8,
24689 ],
24690 );
24691 transmute(src:r)
24692 }
24693}
24694
24695/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24696///
24697/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_shuffle_f64x2&expand=5169)
24698#[inline]
24699#[target_feature(enable = "avx512f")]
24700#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24701#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24702#[rustc_legacy_const_generics(4)]
24703pub fn _mm512_mask_shuffle_f64x2<const MASK: i32>(
24704 src: __m512d,
24705 k: __mmask8,
24706 a: __m512d,
24707 b: __m512d,
24708) -> __m512d {
24709 unsafe {
24710 static_assert_uimm_bits!(MASK, 8);
24711 let r: __m512d = _mm512_shuffle_f64x2::<MASK>(a, b);
24712 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
24713 }
24714}
24715
24716/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24717///
24718/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_shuffle_f64x2&expand=5170)
24719#[inline]
24720#[target_feature(enable = "avx512f")]
24721#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24722#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b10_11_11_11))]
24723#[rustc_legacy_const_generics(3)]
24724pub fn _mm512_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
24725 unsafe {
24726 static_assert_uimm_bits!(MASK, 8);
24727 let r: __m512d = _mm512_shuffle_f64x2::<MASK>(a, b);
24728 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
24729 }
24730}
24731
24732/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst.
24733///
24734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_shuffle_f64x2&expand=5168)
24735#[inline]
24736#[target_feature(enable = "avx512f,avx512vl")]
24737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24738#[cfg_attr(test, assert_instr(vperm, MASK = 0b01))] //should be vshuff64x2
24739#[rustc_legacy_const_generics(2)]
24740pub fn _mm256_shuffle_f64x2<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d {
24741 unsafe {
24742 static_assert_uimm_bits!(MASK, 8);
24743 let a: f64x4 = a.as_f64x4();
24744 let b: f64x4 = b.as_f64x4();
24745 let r: f64x4 = simd_shuffle!(
24746 a,
24747 b,
24748 [
24749 (MASK as u32 & 0b1) * 2 + 0,
24750 (MASK as u32 & 0b1) * 2 + 1,
24751 ((MASK as u32 >> 1) & 0b1) * 2 + 0 + 4,
24752 ((MASK as u32 >> 1) & 0b1) * 2 + 1 + 4,
24753 ],
24754 );
24755 transmute(src:r)
24756 }
24757}
24758
24759/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24760///
24761/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_shuffle_f64x2&expand=5166)
24762#[inline]
24763#[target_feature(enable = "avx512f,avx512vl")]
24764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24765#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
24766#[rustc_legacy_const_generics(4)]
24767pub fn _mm256_mask_shuffle_f64x2<const MASK: i32>(
24768 src: __m256d,
24769 k: __mmask8,
24770 a: __m256d,
24771 b: __m256d,
24772) -> __m256d {
24773 unsafe {
24774 static_assert_uimm_bits!(MASK, 8);
24775 let r: __m256d = _mm256_shuffle_f64x2::<MASK>(a, b);
24776 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
24777 }
24778}
24779
24780/// Shuffle 128-bits (composed of 2 double-precision (64-bit) floating-point elements) selected by imm8 from a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24781///
24782/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_shuffle_f64x2&expand=5167)
24783#[inline]
24784#[target_feature(enable = "avx512f,avx512vl")]
24785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24786#[cfg_attr(test, assert_instr(vshuff64x2, MASK = 0b11))]
24787#[rustc_legacy_const_generics(3)]
24788pub fn _mm256_maskz_shuffle_f64x2<const MASK: i32>(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
24789 unsafe {
24790 static_assert_uimm_bits!(MASK, 8);
24791 let r: __m256d = _mm256_shuffle_f64x2::<MASK>(a, b);
24792 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
24793 }
24794}
24795
24796/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24797///
24798/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf32x4_ps&expand=2442)
24799#[inline]
24800#[target_feature(enable = "avx512f")]
24801#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24802#[cfg_attr(
24803 all(test, not(target_env = "msvc")),
24804 assert_instr(vextractf32x4, IMM8 = 3)
24805)]
24806#[rustc_legacy_const_generics(1)]
24807pub fn _mm512_extractf32x4_ps<const IMM8: i32>(a: __m512) -> __m128 {
24808 unsafe {
24809 static_assert_uimm_bits!(IMM8, 2);
24810 match IMM8 & 0x3 {
24811 0 => simd_shuffle!(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
24812 1 => simd_shuffle!(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
24813 2 => simd_shuffle!(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
24814 _ => simd_shuffle!(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
24815 }
24816 }
24817}
24818
24819/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24820///
24821/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf32x4_ps&expand=2443)
24822#[inline]
24823#[target_feature(enable = "avx512f")]
24824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24825#[cfg_attr(
24826 all(test, not(target_env = "msvc")),
24827 assert_instr(vextractf32x4, IMM8 = 3)
24828)]
24829#[rustc_legacy_const_generics(3)]
24830pub fn _mm512_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m512) -> __m128 {
24831 unsafe {
24832 static_assert_uimm_bits!(IMM8, 2);
24833 let r: __m128 = _mm512_extractf32x4_ps::<IMM8>(a);
24834 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
24835 }
24836}
24837
24838/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24839///
24840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf32x4_ps&expand=2444)
24841#[inline]
24842#[target_feature(enable = "avx512f")]
24843#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24844#[cfg_attr(
24845 all(test, not(target_env = "msvc")),
24846 assert_instr(vextractf32x4, IMM8 = 3)
24847)]
24848#[rustc_legacy_const_generics(2)]
24849pub fn _mm512_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m512) -> __m128 {
24850 unsafe {
24851 static_assert_uimm_bits!(IMM8, 2);
24852 let r: __m128 = _mm512_extractf32x4_ps::<IMM8>(a);
24853 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
24854 }
24855}
24856
24857/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24858///
24859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extractf32x4_ps&expand=2439)
24860#[inline]
24861#[target_feature(enable = "avx512f,avx512vl")]
24862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24863#[cfg_attr(
24864 all(test, not(target_env = "msvc")),
24865 assert_instr(vextract, IMM8 = 1) //should be vextractf32x4
24866)]
24867#[rustc_legacy_const_generics(1)]
24868pub fn _mm256_extractf32x4_ps<const IMM8: i32>(a: __m256) -> __m128 {
24869 unsafe {
24870 static_assert_uimm_bits!(IMM8, 1);
24871 match IMM8 & 0x1 {
24872 0 => simd_shuffle!(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
24873 _ => simd_shuffle!(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
24874 }
24875 }
24876}
24877
24878/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24879///
24880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extractf32x4_ps&expand=2440)
24881#[inline]
24882#[target_feature(enable = "avx512f,avx512vl")]
24883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24884#[cfg_attr(
24885 all(test, not(target_env = "msvc")),
24886 assert_instr(vextractf32x4, IMM8 = 1)
24887)]
24888#[rustc_legacy_const_generics(3)]
24889pub fn _mm256_mask_extractf32x4_ps<const IMM8: i32>(src: __m128, k: __mmask8, a: __m256) -> __m128 {
24890 unsafe {
24891 static_assert_uimm_bits!(IMM8, 1);
24892 let r: __m128 = _mm256_extractf32x4_ps::<IMM8>(a);
24893 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:src.as_f32x4()))
24894 }
24895}
24896
24897/// Extract 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24898///
24899/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extractf32x4_ps&expand=2441)
24900#[inline]
24901#[target_feature(enable = "avx512f,avx512vl")]
24902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24903#[cfg_attr(
24904 all(test, not(target_env = "msvc")),
24905 assert_instr(vextractf32x4, IMM8 = 1)
24906)]
24907#[rustc_legacy_const_generics(2)]
24908pub fn _mm256_maskz_extractf32x4_ps<const IMM8: i32>(k: __mmask8, a: __m256) -> __m128 {
24909 unsafe {
24910 static_assert_uimm_bits!(IMM8, 1);
24911 let r: __m128 = _mm256_extractf32x4_ps::<IMM8>(a);
24912 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x4(), no:f32x4::ZERO))
24913 }
24914}
24915
24916/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the result in dst.
24917///
24918/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti64x4_epi64&expand=2473)
24919#[inline]
24920#[target_feature(enable = "avx512f")]
24921#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24922#[cfg_attr(
24923 all(test, not(target_env = "msvc")),
24924 assert_instr(vextractf64x4, IMM1 = 1) //should be vextracti64x4
24925)]
24926#[rustc_legacy_const_generics(1)]
24927pub fn _mm512_extracti64x4_epi64<const IMM1: i32>(a: __m512i) -> __m256i {
24928 unsafe {
24929 static_assert_uimm_bits!(IMM1, 1);
24930 match IMM1 {
24931 0 => simd_shuffle!(a, _mm512_setzero_si512(), [0, 1, 2, 3]),
24932 _ => simd_shuffle!(a, _mm512_setzero_si512(), [4, 5, 6, 7]),
24933 }
24934 }
24935}
24936
24937/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
24938///
24939/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti64x4_epi64&expand=2474)
24940#[inline]
24941#[target_feature(enable = "avx512f")]
24942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24943#[cfg_attr(
24944 all(test, not(target_env = "msvc")),
24945 assert_instr(vextracti64x4, IMM1 = 1)
24946)]
24947#[rustc_legacy_const_generics(3)]
24948pub fn _mm512_mask_extracti64x4_epi64<const IMM1: i32>(
24949 src: __m256i,
24950 k: __mmask8,
24951 a: __m512i,
24952) -> __m256i {
24953 unsafe {
24954 static_assert_uimm_bits!(IMM1, 1);
24955 let r: __m256i = _mm512_extracti64x4_epi64::<IMM1>(a);
24956 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
24957 }
24958}
24959
24960/// Extract 256 bits (composed of 4 packed 64-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
24961///
24962/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti64x4_epi64&expand=2475)
24963#[inline]
24964#[target_feature(enable = "avx512f")]
24965#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24966#[cfg_attr(
24967 all(test, not(target_env = "msvc")),
24968 assert_instr(vextracti64x4, IMM1 = 1)
24969)]
24970#[rustc_legacy_const_generics(2)]
24971pub fn _mm512_maskz_extracti64x4_epi64<const IMM1: i32>(k: __mmask8, a: __m512i) -> __m256i {
24972 unsafe {
24973 static_assert_uimm_bits!(IMM1, 1);
24974 let r: __m256i = _mm512_extracti64x4_epi64::<IMM1>(a);
24975 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
24976 }
24977}
24978
24979/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the result in dst.
24980///
24981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extractf64x4_pd&expand=2454)
24982#[inline]
24983#[target_feature(enable = "avx512f")]
24984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
24985#[cfg_attr(
24986 all(test, not(target_env = "msvc")),
24987 assert_instr(vextractf64x4, IMM8 = 1)
24988)]
24989#[rustc_legacy_const_generics(1)]
24990pub fn _mm512_extractf64x4_pd<const IMM8: i32>(a: __m512d) -> __m256d {
24991 unsafe {
24992 static_assert_uimm_bits!(IMM8, 1);
24993 match IMM8 & 0x1 {
24994 0 => simd_shuffle!(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
24995 _ => simd_shuffle!(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
24996 }
24997 }
24998}
24999
25000/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25001///
25002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extractf64x4_pd&expand=2455)
25003#[inline]
25004#[target_feature(enable = "avx512f")]
25005#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25006#[cfg_attr(
25007 all(test, not(target_env = "msvc")),
25008 assert_instr(vextractf64x4, IMM8 = 1)
25009)]
25010#[rustc_legacy_const_generics(3)]
25011pub fn _mm512_mask_extractf64x4_pd<const IMM8: i32>(
25012 src: __m256d,
25013 k: __mmask8,
25014 a: __m512d,
25015) -> __m256d {
25016 unsafe {
25017 static_assert_uimm_bits!(IMM8, 1);
25018 let r: __m256d = _mm512_extractf64x4_pd::<IMM8>(a);
25019 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:src.as_f64x4()))
25020 }
25021}
25022
25023/// Extract 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from a, selected with imm8, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25024///
25025/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extractf64x4_pd&expand=2456)
25026#[inline]
25027#[target_feature(enable = "avx512f")]
25028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25029#[cfg_attr(
25030 all(test, not(target_env = "msvc")),
25031 assert_instr(vextractf64x4, IMM8 = 1)
25032)]
25033#[rustc_legacy_const_generics(2)]
25034pub fn _mm512_maskz_extractf64x4_pd<const IMM8: i32>(k: __mmask8, a: __m512d) -> __m256d {
25035 unsafe {
25036 static_assert_uimm_bits!(IMM8, 1);
25037 let r: __m256d = _mm512_extractf64x4_pd::<IMM8>(a);
25038 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x4(), no:f64x4::ZERO))
25039 }
25040}
25041
25042/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the result in dst.
25043///
25044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_extracti32x4_epi32&expand=2461)
25045#[inline]
25046#[target_feature(enable = "avx512f")]
25047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25048#[cfg_attr(
25049 all(test, not(target_env = "msvc")),
25050 assert_instr(vextractf32x4, IMM2 = 3) //should be vextracti32x4
25051)]
25052#[rustc_legacy_const_generics(1)]
25053pub fn _mm512_extracti32x4_epi32<const IMM2: i32>(a: __m512i) -> __m128i {
25054 unsafe {
25055 static_assert_uimm_bits!(IMM2, 2);
25056 let a: i32x16 = a.as_i32x16();
25057 let zero: i32x16 = i32x16::ZERO;
25058 let extract: i32x4 = match IMM2 {
25059 0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25060 1 => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25061 2 => simd_shuffle!(a, zero, [8, 9, 10, 11]),
25062 _ => simd_shuffle!(a, zero, [12, 13, 14, 15]),
25063 };
25064 transmute(src:extract)
25065 }
25066}
25067
25068/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25069///
25070/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_extracti32x4_epi32&expand=2462)
25071#[inline]
25072#[target_feature(enable = "avx512f")]
25073#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25074#[cfg_attr(
25075 all(test, not(target_env = "msvc")),
25076 assert_instr(vextracti32x4, IMM2 = 3)
25077)]
25078#[rustc_legacy_const_generics(3)]
25079pub fn _mm512_mask_extracti32x4_epi32<const IMM2: i32>(
25080 src: __m128i,
25081 k: __mmask8,
25082 a: __m512i,
25083) -> __m128i {
25084 unsafe {
25085 static_assert_uimm_bits!(IMM2, 2);
25086 let r: __m128i = _mm512_extracti32x4_epi32::<IMM2>(a);
25087 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
25088 }
25089}
25090
25091/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM2, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25092///
25093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_extracti32x4_epi32&expand=2463)
25094#[inline]
25095#[target_feature(enable = "avx512f")]
25096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25097#[cfg_attr(
25098 all(test, not(target_env = "msvc")),
25099 assert_instr(vextracti32x4, IMM2 = 3)
25100)]
25101#[rustc_legacy_const_generics(2)]
25102pub fn _mm512_maskz_extracti32x4_epi32<const IMM2: i32>(k: __mmask8, a: __m512i) -> __m128i {
25103 unsafe {
25104 static_assert_uimm_bits!(IMM2, 2);
25105 let r: __m128i = _mm512_extracti32x4_epi32::<IMM2>(a);
25106 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
25107 }
25108}
25109
25110/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the result in dst.
25111///
25112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extracti32x4_epi32&expand=2458)
25113#[inline]
25114#[target_feature(enable = "avx512f,avx512vl")]
25115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25116#[cfg_attr(
25117 all(test, not(target_env = "msvc")),
25118 assert_instr(vextract, IMM1 = 1) //should be vextracti32x4
25119)]
25120#[rustc_legacy_const_generics(1)]
25121pub fn _mm256_extracti32x4_epi32<const IMM1: i32>(a: __m256i) -> __m128i {
25122 unsafe {
25123 static_assert_uimm_bits!(IMM1, 1);
25124 let a: i32x8 = a.as_i32x8();
25125 let zero: i32x8 = i32x8::ZERO;
25126 let extract: i32x4 = match IMM1 {
25127 0 => simd_shuffle!(a, zero, [0, 1, 2, 3]),
25128 _ => simd_shuffle!(a, zero, [4, 5, 6, 7]),
25129 };
25130 transmute(src:extract)
25131 }
25132}
25133
25134/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25135///
25136/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_extracti32x4_epi32&expand=2459)
25137#[inline]
25138#[target_feature(enable = "avx512f,avx512vl")]
25139#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25140#[cfg_attr(
25141 all(test, not(target_env = "msvc")),
25142 assert_instr(vextracti32x4, IMM1 = 1)
25143)]
25144#[rustc_legacy_const_generics(3)]
25145pub fn _mm256_mask_extracti32x4_epi32<const IMM1: i32>(
25146 src: __m128i,
25147 k: __mmask8,
25148 a: __m256i,
25149) -> __m128i {
25150 unsafe {
25151 static_assert_uimm_bits!(IMM1, 1);
25152 let r: __m128i = _mm256_extracti32x4_epi32::<IMM1>(a);
25153 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
25154 }
25155}
25156
25157/// Extract 128 bits (composed of 4 packed 32-bit integers) from a, selected with IMM1, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25158///
25159/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_extracti32x4_epi32&expand=2460)
25160#[inline]
25161#[target_feature(enable = "avx512f,avx512vl")]
25162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25163#[cfg_attr(
25164 all(test, not(target_env = "msvc")),
25165 assert_instr(vextracti32x4, IMM1 = 1)
25166)]
25167#[rustc_legacy_const_generics(2)]
25168pub fn _mm256_maskz_extracti32x4_epi32<const IMM1: i32>(k: __mmask8, a: __m256i) -> __m128i {
25169 unsafe {
25170 static_assert_uimm_bits!(IMM1, 1);
25171 let r: __m128i = _mm256_extracti32x4_epi32::<IMM1>(a);
25172 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
25173 }
25174}
25175
25176/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25177///
25178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_moveldup_ps&expand=3862)
25179#[inline]
25180#[target_feature(enable = "avx512f")]
25181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25182#[cfg_attr(test, assert_instr(vmovsldup))]
25183pub fn _mm512_moveldup_ps(a: __m512) -> __m512 {
25184 unsafe {
25185 let r: f32x16 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25186 transmute(src:r)
25187 }
25188}
25189
25190/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25191///
25192/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_moveldup_ps&expand=3860)
25193#[inline]
25194#[target_feature(enable = "avx512f")]
25195#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25196#[cfg_attr(test, assert_instr(vmovsldup))]
25197pub fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25198 unsafe {
25199 let mov: f32x16 =
25200 simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25201 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
25202 }
25203}
25204
25205/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25206///
25207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_moveldup_ps&expand=3861)
25208#[inline]
25209#[target_feature(enable = "avx512f")]
25210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25211#[cfg_attr(test, assert_instr(vmovsldup))]
25212pub fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
25213 unsafe {
25214 let mov: f32x16 =
25215 simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
25216 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x16::ZERO))
25217 }
25218}
25219
25220/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25221///
25222/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_moveldup_ps&expand=3857)
25223#[inline]
25224#[target_feature(enable = "avx512f,avx512vl")]
25225#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25226#[cfg_attr(test, assert_instr(vmovsldup))]
25227pub fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25228 unsafe {
25229 let mov: __m256 = _mm256_moveldup_ps(a);
25230 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:src.as_f32x8()))
25231 }
25232}
25233
25234/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25235///
25236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_moveldup_ps&expand=3858)
25237#[inline]
25238#[target_feature(enable = "avx512f,avx512vl")]
25239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25240#[cfg_attr(test, assert_instr(vmovsldup))]
25241pub fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
25242 unsafe {
25243 let mov: __m256 = _mm256_moveldup_ps(a);
25244 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:f32x8::ZERO))
25245 }
25246}
25247
25248/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25249///
25250/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_moveldup_ps&expand=3854)
25251#[inline]
25252#[target_feature(enable = "avx512f,avx512vl")]
25253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25254#[cfg_attr(test, assert_instr(vmovsldup))]
25255pub fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25256 unsafe {
25257 let mov: __m128 = _mm_moveldup_ps(a);
25258 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:src.as_f32x4()))
25259 }
25260}
25261
25262/// Duplicate even-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25263///
25264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_moveldup_ps&expand=3855)
25265#[inline]
25266#[target_feature(enable = "avx512f,avx512vl")]
25267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25268#[cfg_attr(test, assert_instr(vmovsldup))]
25269pub fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
25270 unsafe {
25271 let mov: __m128 = _mm_moveldup_ps(a);
25272 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:f32x4::ZERO))
25273 }
25274}
25275
25276/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst.
25277///
25278/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movehdup_ps&expand=3852)
25279#[inline]
25280#[target_feature(enable = "avx512f")]
25281#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25282#[cfg_attr(test, assert_instr(vmovshdup))]
25283pub fn _mm512_movehdup_ps(a: __m512) -> __m512 {
25284 unsafe {
25285 let r: f32x16 = simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25286 transmute(src:r)
25287 }
25288}
25289
25290/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25291///
25292/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movehdup_ps&expand=3850)
25293#[inline]
25294#[target_feature(enable = "avx512f")]
25295#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25296#[cfg_attr(test, assert_instr(vmovshdup))]
25297pub fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
25298 unsafe {
25299 let mov: f32x16 =
25300 simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25301 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f32x16()))
25302 }
25303}
25304
25305/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25306///
25307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movehdup_ps&expand=3851)
25308#[inline]
25309#[target_feature(enable = "avx512f")]
25310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25311#[cfg_attr(test, assert_instr(vmovshdup))]
25312pub fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
25313 unsafe {
25314 let mov: f32x16 =
25315 simd_shuffle!(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
25316 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f32x16::ZERO))
25317 }
25318}
25319
25320/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25321///
25322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movehdup_ps&expand=3847)
25323#[inline]
25324#[target_feature(enable = "avx512f,avx512vl")]
25325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25326#[cfg_attr(test, assert_instr(vmovshdup))]
25327pub fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
25328 unsafe {
25329 let mov: __m256 = _mm256_movehdup_ps(a);
25330 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:src.as_f32x8()))
25331 }
25332}
25333
25334/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25335///
25336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movehdup_ps&expand=3848)
25337#[inline]
25338#[target_feature(enable = "avx512f,avx512vl")]
25339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25340#[cfg_attr(test, assert_instr(vmovshdup))]
25341pub fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
25342 unsafe {
25343 let mov: __m256 = _mm256_movehdup_ps(a);
25344 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x8(), no:f32x8::ZERO))
25345 }
25346}
25347
25348/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25349///
25350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movehdup_ps&expand=3844)
25351#[inline]
25352#[target_feature(enable = "avx512f,avx512vl")]
25353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25354#[cfg_attr(test, assert_instr(vmovshdup))]
25355pub fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
25356 unsafe {
25357 let mov: __m128 = _mm_movehdup_ps(a);
25358 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:src.as_f32x4()))
25359 }
25360}
25361
25362/// Duplicate odd-indexed single-precision (32-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25363///
25364/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movehdup_ps&expand=3845)
25365#[inline]
25366#[target_feature(enable = "avx512f,avx512vl")]
25367#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25368#[cfg_attr(test, assert_instr(vmovshdup))]
25369pub fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
25370 unsafe {
25371 let mov: __m128 = _mm_movehdup_ps(a);
25372 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f32x4(), no:f32x4::ZERO))
25373 }
25374}
25375
25376/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst.
25377///
25378/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_movedup_pd&expand=3843)
25379#[inline]
25380#[target_feature(enable = "avx512f")]
25381#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25382#[cfg_attr(test, assert_instr(vmovddup))]
25383pub fn _mm512_movedup_pd(a: __m512d) -> __m512d {
25384 unsafe {
25385 let r: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25386 transmute(src:r)
25387 }
25388}
25389
25390/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25391///
25392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_movedup_pd&expand=3841)
25393#[inline]
25394#[target_feature(enable = "avx512f")]
25395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25396#[cfg_attr(test, assert_instr(vmovddup))]
25397pub fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
25398 unsafe {
25399 let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25400 transmute(src:simd_select_bitmask(m:k, yes:mov, no:src.as_f64x8()))
25401 }
25402}
25403
25404/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25405///
25406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_movedup_pd&expand=3842)
25407#[inline]
25408#[target_feature(enable = "avx512f")]
25409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25410#[cfg_attr(test, assert_instr(vmovddup))]
25411pub fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
25412 unsafe {
25413 let mov: f64x8 = simd_shuffle!(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
25414 transmute(src:simd_select_bitmask(m:k, yes:mov, no:f64x8::ZERO))
25415 }
25416}
25417
25418/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25419///
25420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_movedup_pd&expand=3838)
25421#[inline]
25422#[target_feature(enable = "avx512f,avx512vl")]
25423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25424#[cfg_attr(test, assert_instr(vmovddup))]
25425pub fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
25426 unsafe {
25427 let mov: __m256d = _mm256_movedup_pd(a);
25428 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x4(), no:src.as_f64x4()))
25429 }
25430}
25431
25432/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25433///
25434/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_movedup_pd&expand=3839)
25435#[inline]
25436#[target_feature(enable = "avx512f,avx512vl")]
25437#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25438#[cfg_attr(test, assert_instr(vmovddup))]
25439pub fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
25440 unsafe {
25441 let mov: __m256d = _mm256_movedup_pd(a);
25442 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x4(), no:f64x4::ZERO))
25443 }
25444}
25445
25446/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25447///
25448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_movedup_pd&expand=3835)
25449#[inline]
25450#[target_feature(enable = "avx512f,avx512vl")]
25451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25452#[cfg_attr(test, assert_instr(vmovddup))]
25453pub fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
25454 unsafe {
25455 let mov: __m128d = _mm_movedup_pd(a);
25456 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x2(), no:src.as_f64x2()))
25457 }
25458}
25459
25460/// Duplicate even-indexed double-precision (64-bit) floating-point elements from a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25461///
25462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_movedup_pd&expand=3836)
25463#[inline]
25464#[target_feature(enable = "avx512f,avx512vl")]
25465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25466#[cfg_attr(test, assert_instr(vmovddup))]
25467pub fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
25468 unsafe {
25469 let mov: __m128d = _mm_movedup_pd(a);
25470 transmute(src:simd_select_bitmask(m:k, yes:mov.as_f64x2(), no:f64x2::ZERO))
25471 }
25472}
25473
25474/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25475///
25476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti32x4&expand=3174)
25477#[inline]
25478#[target_feature(enable = "avx512f")]
25479#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25480#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))] //should be vinserti32x4
25481#[rustc_legacy_const_generics(2)]
25482pub fn _mm512_inserti32x4<const IMM8: i32>(a: __m512i, b: __m128i) -> __m512i {
25483 unsafe {
25484 static_assert_uimm_bits!(IMM8, 2);
25485 let a = a.as_i32x16();
25486 let b = _mm512_castsi128_si512(b).as_i32x16();
25487 let ret: i32x16 = match IMM8 & 0b11 {
25488 0 => {
25489 simd_shuffle!(
25490 a,
25491 b,
25492 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
25493 )
25494 }
25495 1 => {
25496 simd_shuffle!(
25497 a,
25498 b,
25499 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
25500 )
25501 }
25502 2 => {
25503 simd_shuffle!(
25504 a,
25505 b,
25506 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
25507 )
25508 }
25509 _ => {
25510 simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
25511 }
25512 };
25513 transmute(ret)
25514 }
25515}
25516
25517/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25518///
25519/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti32x4&expand=3175)
25520#[inline]
25521#[target_feature(enable = "avx512f")]
25522#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25523#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
25524#[rustc_legacy_const_generics(4)]
25525pub fn _mm512_mask_inserti32x4<const IMM8: i32>(
25526 src: __m512i,
25527 k: __mmask16,
25528 a: __m512i,
25529 b: __m128i,
25530) -> __m512i {
25531 unsafe {
25532 static_assert_uimm_bits!(IMM8, 2);
25533 let r: __m512i = _mm512_inserti32x4::<IMM8>(a, b);
25534 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
25535 }
25536}
25537
25538/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25539///
25540/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti32x4&expand=3176)
25541#[inline]
25542#[target_feature(enable = "avx512f")]
25543#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25544#[cfg_attr(test, assert_instr(vinserti32x4, IMM8 = 2))]
25545#[rustc_legacy_const_generics(3)]
25546pub fn _mm512_maskz_inserti32x4<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m128i) -> __m512i {
25547 unsafe {
25548 static_assert_uimm_bits!(IMM8, 2);
25549 let r: __m512i = _mm512_inserti32x4::<IMM8>(a, b);
25550 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
25551 }
25552}
25553
25554/// Copy a to dst, then insert 128 bits (composed of 4 packed 32-bit integers) from b into dst at the location specified by imm8.
25555///
25556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_inserti32x4&expand=3171)
25557#[inline]
25558#[target_feature(enable = "avx512f,avx512vl")]
25559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25560#[cfg_attr(
25561 all(test, not(target_env = "msvc")),
25562 assert_instr(vinsert, IMM8 = 1) //should be vinserti32x4
25563)]
25564#[rustc_legacy_const_generics(2)]
25565pub fn _mm256_inserti32x4<const IMM8: i32>(a: __m256i, b: __m128i) -> __m256i {
25566 unsafe {
25567 static_assert_uimm_bits!(IMM8, 1);
25568 let a: i32x8 = a.as_i32x8();
25569 let b: i32x8 = _mm256_castsi128_si256(b).as_i32x8();
25570 let ret: i32x8 = match IMM8 & 0b1 {
25571 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25572 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25573 };
25574 transmute(src:ret)
25575 }
25576}
25577
25578/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25579///
25580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_inserti32x4&expand=3172)
25581#[inline]
25582#[target_feature(enable = "avx512f,avx512vl")]
25583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25584#[cfg_attr(
25585 all(test, not(target_env = "msvc")),
25586 assert_instr(vinserti32x4, IMM8 = 1)
25587)]
25588#[rustc_legacy_const_generics(4)]
25589pub fn _mm256_mask_inserti32x4<const IMM8: i32>(
25590 src: __m256i,
25591 k: __mmask8,
25592 a: __m256i,
25593 b: __m128i,
25594) -> __m256i {
25595 unsafe {
25596 static_assert_uimm_bits!(IMM8, 1);
25597 let r: __m256i = _mm256_inserti32x4::<IMM8>(a, b);
25598 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
25599 }
25600}
25601
25602/// Copy a to tmp, then insert 128 bits (composed of 4 packed 32-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25603///
25604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_inserti32x4&expand=3173)
25605#[inline]
25606#[target_feature(enable = "avx512f,avx512vl")]
25607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25608#[cfg_attr(
25609 all(test, not(target_env = "msvc")),
25610 assert_instr(vinserti32x4, IMM8 = 1)
25611)]
25612#[rustc_legacy_const_generics(3)]
25613pub fn _mm256_maskz_inserti32x4<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m128i) -> __m256i {
25614 unsafe {
25615 static_assert_uimm_bits!(IMM8, 1);
25616 let r: __m256i = _mm256_inserti32x4::<IMM8>(a, b);
25617 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
25618 }
25619}
25620
25621/// Copy a to dst, then insert 256 bits (composed of 4 packed 64-bit integers) from b into dst at the location specified by imm8.
25622///
25623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_inserti64x4&expand=3186)
25624#[inline]
25625#[target_feature(enable = "avx512f")]
25626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25627#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))] //should be vinserti64x4
25628#[rustc_legacy_const_generics(2)]
25629pub fn _mm512_inserti64x4<const IMM8: i32>(a: __m512i, b: __m256i) -> __m512i {
25630 unsafe {
25631 static_assert_uimm_bits!(IMM8, 1);
25632 let b: __m512i = _mm512_castsi256_si512(b);
25633 match IMM8 & 0b1 {
25634 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25635 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25636 }
25637 }
25638}
25639
25640/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25641///
25642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_inserti64x4&expand=3187)
25643#[inline]
25644#[target_feature(enable = "avx512f")]
25645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25646#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
25647#[rustc_legacy_const_generics(4)]
25648pub fn _mm512_mask_inserti64x4<const IMM8: i32>(
25649 src: __m512i,
25650 k: __mmask8,
25651 a: __m512i,
25652 b: __m256i,
25653) -> __m512i {
25654 unsafe {
25655 static_assert_uimm_bits!(IMM8, 1);
25656 let r: __m512i = _mm512_inserti64x4::<IMM8>(a, b);
25657 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
25658 }
25659}
25660
25661/// Copy a to tmp, then insert 256 bits (composed of 4 packed 64-bit integers) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25662///
25663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_inserti64x4&expand=3188)
25664#[inline]
25665#[target_feature(enable = "avx512f")]
25666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25667#[cfg_attr(test, assert_instr(vinserti64x4, IMM8 = 1))]
25668#[rustc_legacy_const_generics(3)]
25669pub fn _mm512_maskz_inserti64x4<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m256i) -> __m512i {
25670 unsafe {
25671 static_assert_uimm_bits!(IMM8, 1);
25672 let r: __m512i = _mm512_inserti64x4::<IMM8>(a, b);
25673 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
25674 }
25675}
25676
25677/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25678///
25679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf32x4&expand=3155)
25680#[inline]
25681#[target_feature(enable = "avx512f")]
25682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25683#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25684#[rustc_legacy_const_generics(2)]
25685pub fn _mm512_insertf32x4<const IMM8: i32>(a: __m512, b: __m128) -> __m512 {
25686 unsafe {
25687 static_assert_uimm_bits!(IMM8, 2);
25688 let b = _mm512_castps128_ps512(b);
25689 match IMM8 & 0b11 {
25690 0 => {
25691 simd_shuffle!(
25692 a,
25693 b,
25694 [16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
25695 )
25696 }
25697 1 => {
25698 simd_shuffle!(
25699 a,
25700 b,
25701 [0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
25702 )
25703 }
25704 2 => {
25705 simd_shuffle!(
25706 a,
25707 b,
25708 [0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
25709 )
25710 }
25711 _ => {
25712 simd_shuffle!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19])
25713 }
25714 }
25715 }
25716}
25717
25718/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25719///
25720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf32x4&expand=3156)
25721#[inline]
25722#[target_feature(enable = "avx512f")]
25723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25724#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25725#[rustc_legacy_const_generics(4)]
25726pub fn _mm512_mask_insertf32x4<const IMM8: i32>(
25727 src: __m512,
25728 k: __mmask16,
25729 a: __m512,
25730 b: __m128,
25731) -> __m512 {
25732 unsafe {
25733 static_assert_uimm_bits!(IMM8, 2);
25734 let r: __m512 = _mm512_insertf32x4::<IMM8>(a, b);
25735 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:src.as_f32x16()))
25736 }
25737}
25738
25739/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25740///
25741/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf32x4&expand=3157)
25742#[inline]
25743#[target_feature(enable = "avx512f")]
25744#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25745#[cfg_attr(test, assert_instr(vinsertf32x4, IMM8 = 2))]
25746#[rustc_legacy_const_generics(3)]
25747pub fn _mm512_maskz_insertf32x4<const IMM8: i32>(k: __mmask16, a: __m512, b: __m128) -> __m512 {
25748 unsafe {
25749 static_assert_uimm_bits!(IMM8, 2);
25750 let r: __m512 = _mm512_insertf32x4::<IMM8>(a, b);
25751 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x16(), no:f32x16::ZERO))
25752 }
25753}
25754
25755/// Copy a to dst, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into dst at the location specified by imm8.
25756///
25757/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_insertf32x4&expand=3152)
25758#[inline]
25759#[target_feature(enable = "avx512f,avx512vl")]
25760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25761#[cfg_attr(
25762 all(test, not(target_env = "msvc")),
25763 assert_instr(vinsert, IMM8 = 1) //should be vinsertf32x4
25764)]
25765#[rustc_legacy_const_generics(2)]
25766pub fn _mm256_insertf32x4<const IMM8: i32>(a: __m256, b: __m128) -> __m256 {
25767 unsafe {
25768 static_assert_uimm_bits!(IMM8, 1);
25769 let b: __m256 = _mm256_castps128_ps256(b);
25770 match IMM8 & 0b1 {
25771 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25772 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25773 }
25774 }
25775}
25776
25777/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25778///
25779/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_insertf32x4&expand=3153)
25780#[inline]
25781#[target_feature(enable = "avx512f,avx512vl")]
25782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25783#[cfg_attr(
25784 all(test, not(target_env = "msvc")),
25785 assert_instr(vinsertf32x4, IMM8 = 1)
25786)]
25787#[rustc_legacy_const_generics(4)]
25788pub fn _mm256_mask_insertf32x4<const IMM8: i32>(
25789 src: __m256,
25790 k: __mmask8,
25791 a: __m256,
25792 b: __m128,
25793) -> __m256 {
25794 unsafe {
25795 static_assert_uimm_bits!(IMM8, 1);
25796 let r: __m256 = _mm256_insertf32x4::<IMM8>(a, b);
25797 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:src.as_f32x8()))
25798 }
25799}
25800
25801/// Copy a to tmp, then insert 128 bits (composed of 4 packed single-precision (32-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25802///
25803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_insertf32x4&expand=3154)
25804#[inline]
25805#[target_feature(enable = "avx512f,avx512vl")]
25806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25807#[cfg_attr(
25808 all(test, not(target_env = "msvc")),
25809 assert_instr(vinsertf32x4, IMM8 = 1)
25810)]
25811#[rustc_legacy_const_generics(3)]
25812pub fn _mm256_maskz_insertf32x4<const IMM8: i32>(k: __mmask8, a: __m256, b: __m128) -> __m256 {
25813 unsafe {
25814 static_assert_uimm_bits!(IMM8, 1);
25815 let r: __m256 = _mm256_insertf32x4::<IMM8>(a, b);
25816 transmute(src:simd_select_bitmask(m:k, yes:r.as_f32x8(), no:f32x8::ZERO))
25817 }
25818}
25819
25820/// Copy a to dst, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into dst at the location specified by imm8.
25821///
25822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_insertf64x4&expand=3167)
25823#[inline]
25824#[target_feature(enable = "avx512f")]
25825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25826#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25827#[rustc_legacy_const_generics(2)]
25828pub fn _mm512_insertf64x4<const IMM8: i32>(a: __m512d, b: __m256d) -> __m512d {
25829 unsafe {
25830 static_assert_uimm_bits!(IMM8, 1);
25831 let b: __m512d = _mm512_castpd256_pd512(b);
25832 match IMM8 & 0b1 {
25833 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
25834 _ => simd_shuffle!(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
25835 }
25836 }
25837}
25838
25839/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25840///
25841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_insertf64x4&expand=3168)
25842#[inline]
25843#[target_feature(enable = "avx512f")]
25844#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25845#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25846#[rustc_legacy_const_generics(4)]
25847pub fn _mm512_mask_insertf64x4<const IMM8: i32>(
25848 src: __m512d,
25849 k: __mmask8,
25850 a: __m512d,
25851 b: __m256d,
25852) -> __m512d {
25853 unsafe {
25854 static_assert_uimm_bits!(IMM8, 1);
25855 let r: __m512d = _mm512_insertf64x4::<IMM8>(a, b);
25856 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:src.as_f64x8()))
25857 }
25858}
25859
25860/// Copy a to tmp, then insert 256 bits (composed of 4 packed double-precision (64-bit) floating-point elements) from b into tmp at the location specified by imm8. Store tmp to dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25861///
25862/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_insertf64x4&expand=3169)
25863#[inline]
25864#[target_feature(enable = "avx512f")]
25865#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25866#[cfg_attr(test, assert_instr(vinsertf64x4, IMM8 = 1))]
25867#[rustc_legacy_const_generics(3)]
25868pub fn _mm512_maskz_insertf64x4<const IMM8: i32>(k: __mmask8, a: __m512d, b: __m256d) -> __m512d {
25869 unsafe {
25870 static_assert_uimm_bits!(IMM8, 1);
25871 let r: __m512d = _mm512_insertf64x4::<IMM8>(a, b);
25872 transmute(src:simd_select_bitmask(m:k, yes:r.as_f64x8(), no:f64x8::ZERO))
25873 }
25874}
25875
25876/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
25877///
25878/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi32&expand=6021)
25879#[inline]
25880#[target_feature(enable = "avx512f")]
25881#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25882#[cfg_attr(test, assert_instr(vunpckhps))] //should be vpunpckhdq
25883pub fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
25884 unsafe {
25885 let a: i32x16 = a.as_i32x16();
25886 let b: i32x16 = b.as_i32x16();
25887 #[rustfmt::skip]
25888 let r: i32x16 = simd_shuffle!(
25889 a, b,
25890 [ 2, 18, 3, 19,
25891 2 + 4, 18 + 4, 3 + 4, 19 + 4,
25892 2 + 8, 18 + 8, 3 + 8, 19 + 8,
25893 2 + 12, 18 + 12, 3 + 12, 19 + 12],
25894 );
25895 transmute(src:r)
25896 }
25897}
25898
25899/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25900///
25901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi32&expand=6019)
25902#[inline]
25903#[target_feature(enable = "avx512f")]
25904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25905#[cfg_attr(test, assert_instr(vpunpckhdq))]
25906pub fn _mm512_mask_unpackhi_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25907 unsafe {
25908 let unpackhi: i32x16 = _mm512_unpackhi_epi32(a, b).as_i32x16();
25909 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x16()))
25910 }
25911}
25912
25913/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25914///
25915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi32&expand=6020)
25916#[inline]
25917#[target_feature(enable = "avx512f")]
25918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25919#[cfg_attr(test, assert_instr(vpunpckhdq))]
25920pub fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
25921 unsafe {
25922 let unpackhi: i32x16 = _mm512_unpackhi_epi32(a, b).as_i32x16();
25923 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i32x16::ZERO))
25924 }
25925}
25926
25927/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25928///
25929/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi32&expand=6016)
25930#[inline]
25931#[target_feature(enable = "avx512f,avx512vl")]
25932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25933#[cfg_attr(test, assert_instr(vpunpckhdq))]
25934pub fn _mm256_mask_unpackhi_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25935 unsafe {
25936 let unpackhi: i32x8 = _mm256_unpackhi_epi32(a, b).as_i32x8();
25937 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x8()))
25938 }
25939}
25940
25941/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25942///
25943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi32&expand=6017)
25944#[inline]
25945#[target_feature(enable = "avx512f,avx512vl")]
25946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25947#[cfg_attr(test, assert_instr(vpunpckhdq))]
25948pub fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
25949 unsafe {
25950 let unpackhi: i32x8 = _mm256_unpackhi_epi32(a, b).as_i32x8();
25951 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i32x8::ZERO))
25952 }
25953}
25954
25955/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25956///
25957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi32&expand=6013)
25958#[inline]
25959#[target_feature(enable = "avx512f,avx512vl")]
25960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25961#[cfg_attr(test, assert_instr(vpunpckhdq))]
25962pub fn _mm_mask_unpackhi_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25963 unsafe {
25964 let unpackhi: i32x4 = _mm_unpackhi_epi32(a, b).as_i32x4();
25965 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i32x4()))
25966 }
25967}
25968
25969/// Unpack and interleave 32-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
25970///
25971/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi32&expand=6014)
25972#[inline]
25973#[target_feature(enable = "avx512f,avx512vl")]
25974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25975#[cfg_attr(test, assert_instr(vpunpckhdq))]
25976pub fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
25977 unsafe {
25978 let unpackhi: i32x4 = _mm_unpackhi_epi32(a, b).as_i32x4();
25979 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i32x4::ZERO))
25980 }
25981}
25982
25983/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst.
25984///
25985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_epi64&expand=6030)
25986#[inline]
25987#[target_feature(enable = "avx512f")]
25988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
25989#[cfg_attr(test, assert_instr(vunpckhpd))] //should be vpunpckhqdq
25990pub fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
25991 unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
25992}
25993
25994/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
25995///
25996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_epi64&expand=6028)
25997#[inline]
25998#[target_feature(enable = "avx512f")]
25999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26000#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26001pub fn _mm512_mask_unpackhi_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26002 unsafe {
26003 let unpackhi: i64x8 = _mm512_unpackhi_epi64(a, b).as_i64x8();
26004 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x8()))
26005 }
26006}
26007
26008/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26009///
26010/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_epi64&expand=6029)
26011#[inline]
26012#[target_feature(enable = "avx512f")]
26013#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26014#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26015pub fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26016 unsafe {
26017 let unpackhi: i64x8 = _mm512_unpackhi_epi64(a, b).as_i64x8();
26018 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i64x8::ZERO))
26019 }
26020}
26021
26022/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26023///
26024/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_epi64&expand=6025)
26025#[inline]
26026#[target_feature(enable = "avx512f,avx512vl")]
26027#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26028#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26029pub fn _mm256_mask_unpackhi_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26030 unsafe {
26031 let unpackhi: i64x4 = _mm256_unpackhi_epi64(a, b).as_i64x4();
26032 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x4()))
26033 }
26034}
26035
26036/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26037///
26038/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_epi64&expand=6026)
26039#[inline]
26040#[target_feature(enable = "avx512f,avx512vl")]
26041#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26042#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26043pub fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26044 unsafe {
26045 let unpackhi: i64x4 = _mm256_unpackhi_epi64(a, b).as_i64x4();
26046 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i64x4::ZERO))
26047 }
26048}
26049
26050/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26051///
26052/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_epi64&expand=6022)
26053#[inline]
26054#[target_feature(enable = "avx512f,avx512vl")]
26055#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26056#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26057pub fn _mm_mask_unpackhi_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26058 unsafe {
26059 let unpackhi: i64x2 = _mm_unpackhi_epi64(a, b).as_i64x2();
26060 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_i64x2()))
26061 }
26062}
26063
26064/// Unpack and interleave 64-bit integers from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26065///
26066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_epi64&expand=6023)
26067#[inline]
26068#[target_feature(enable = "avx512f,avx512vl")]
26069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26070#[cfg_attr(test, assert_instr(vpunpckhqdq))]
26071pub fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26072 unsafe {
26073 let unpackhi: i64x2 = _mm_unpackhi_epi64(a, b).as_i64x2();
26074 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:i64x2::ZERO))
26075 }
26076}
26077
26078/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26079///
26080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_ps&expand=6060)
26081#[inline]
26082#[target_feature(enable = "avx512f")]
26083#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26084#[cfg_attr(test, assert_instr(vunpckhps))]
26085pub fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
26086 unsafe {
26087 #[rustfmt::skip]
26088 simd_shuffle!(
26089 a, b,
26090 [ 2, 18, 3, 19,
26091 2 + 4, 18 + 4, 3 + 4, 19 + 4,
26092 2 + 8, 18 + 8, 3 + 8, 19 + 8,
26093 2 + 12, 18 + 12, 3 + 12, 19 + 12],
26094 )
26095 }
26096}
26097
26098/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26099///
26100/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_ps&expand=6058)
26101#[inline]
26102#[target_feature(enable = "avx512f")]
26103#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26104#[cfg_attr(test, assert_instr(vunpckhps))]
26105pub fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26106 unsafe {
26107 let unpackhi: f32x16 = _mm512_unpackhi_ps(a, b).as_f32x16();
26108 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x16()))
26109 }
26110}
26111
26112/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26113///
26114/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_ps&expand=6059)
26115#[inline]
26116#[target_feature(enable = "avx512f")]
26117#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26118#[cfg_attr(test, assert_instr(vunpckhps))]
26119pub fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26120 unsafe {
26121 let unpackhi: f32x16 = _mm512_unpackhi_ps(a, b).as_f32x16();
26122 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f32x16::ZERO))
26123 }
26124}
26125
26126/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26127///
26128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_ps&expand=6055)
26129#[inline]
26130#[target_feature(enable = "avx512f,avx512vl")]
26131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26132#[cfg_attr(test, assert_instr(vunpckhps))]
26133pub fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26134 unsafe {
26135 let unpackhi: f32x8 = _mm256_unpackhi_ps(a, b).as_f32x8();
26136 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x8()))
26137 }
26138}
26139
26140/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26141///
26142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_ps&expand=6056)
26143#[inline]
26144#[target_feature(enable = "avx512f,avx512vl")]
26145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26146#[cfg_attr(test, assert_instr(vunpckhps))]
26147pub fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26148 unsafe {
26149 let unpackhi: f32x8 = _mm256_unpackhi_ps(a, b).as_f32x8();
26150 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f32x8::ZERO))
26151 }
26152}
26153
26154/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26155///
26156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_ps&expand=6052)
26157#[inline]
26158#[target_feature(enable = "avx512f,avx512vl")]
26159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26160#[cfg_attr(test, assert_instr(vunpckhps))]
26161pub fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26162 unsafe {
26163 let unpackhi: f32x4 = _mm_unpackhi_ps(a, b).as_f32x4();
26164 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f32x4()))
26165 }
26166}
26167
26168/// Unpack and interleave single-precision (32-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26169///
26170/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_ps&expand=6053)
26171#[inline]
26172#[target_feature(enable = "avx512f,avx512vl")]
26173#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26174#[cfg_attr(test, assert_instr(vunpckhps))]
26175pub fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26176 unsafe {
26177 let unpackhi: f32x4 = _mm_unpackhi_ps(a, b).as_f32x4();
26178 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f32x4::ZERO))
26179 }
26180}
26181
26182/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst.
26183///
26184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpackhi_pd&expand=6048)
26185#[inline]
26186#[target_feature(enable = "avx512f")]
26187#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26188#[cfg_attr(test, assert_instr(vunpckhpd))]
26189pub fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
26190 unsafe { simd_shuffle!(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6]) }
26191}
26192
26193/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26194///
26195/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpackhi_pd&expand=6046)
26196#[inline]
26197#[target_feature(enable = "avx512f")]
26198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26199#[cfg_attr(test, assert_instr(vunpckhpd))]
26200pub fn _mm512_mask_unpackhi_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26201 unsafe {
26202 let unpackhi: f64x8 = _mm512_unpackhi_pd(a, b).as_f64x8();
26203 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x8()))
26204 }
26205}
26206
26207/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26208///
26209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpackhi_pd&expand=6047)
26210#[inline]
26211#[target_feature(enable = "avx512f")]
26212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26213#[cfg_attr(test, assert_instr(vunpckhpd))]
26214pub fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26215 unsafe {
26216 let unpackhi: f64x8 = _mm512_unpackhi_pd(a, b).as_f64x8();
26217 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f64x8::ZERO))
26218 }
26219}
26220
26221/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26222///
26223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpackhi_pd&expand=6043)
26224#[inline]
26225#[target_feature(enable = "avx512f,avx512vl")]
26226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26227#[cfg_attr(test, assert_instr(vunpckhpd))]
26228pub fn _mm256_mask_unpackhi_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26229 unsafe {
26230 let unpackhi: f64x4 = _mm256_unpackhi_pd(a, b).as_f64x4();
26231 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x4()))
26232 }
26233}
26234
26235/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26236///
26237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpackhi_pd&expand=6044)
26238#[inline]
26239#[target_feature(enable = "avx512f,avx512vl")]
26240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26241#[cfg_attr(test, assert_instr(vunpckhpd))]
26242pub fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26243 unsafe {
26244 let unpackhi: f64x4 = _mm256_unpackhi_pd(a, b).as_f64x4();
26245 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f64x4::ZERO))
26246 }
26247}
26248
26249/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26250///
26251/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpackhi_pd&expand=6040)
26252#[inline]
26253#[target_feature(enable = "avx512f,avx512vl")]
26254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26255#[cfg_attr(test, assert_instr(vunpckhpd))]
26256pub fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26257 unsafe {
26258 let unpackhi: f64x2 = _mm_unpackhi_pd(a, b).as_f64x2();
26259 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:src.as_f64x2()))
26260 }
26261}
26262
26263/// Unpack and interleave double-precision (64-bit) floating-point elements from the high half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26264///
26265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpackhi_pd&expand=6041)
26266#[inline]
26267#[target_feature(enable = "avx512f,avx512vl")]
26268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26269#[cfg_attr(test, assert_instr(vunpckhpd))]
26270pub fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26271 unsafe {
26272 let unpackhi: f64x2 = _mm_unpackhi_pd(a, b).as_f64x2();
26273 transmute(src:simd_select_bitmask(m:k, yes:unpackhi, no:f64x2::ZERO))
26274 }
26275}
26276
26277/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26278///
26279/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi32&expand=6078)
26280#[inline]
26281#[target_feature(enable = "avx512f")]
26282#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26283#[cfg_attr(test, assert_instr(vunpcklps))] //should be vpunpckldq
26284pub fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
26285 unsafe {
26286 let a: i32x16 = a.as_i32x16();
26287 let b: i32x16 = b.as_i32x16();
26288 #[rustfmt::skip]
26289 let r: i32x16 = simd_shuffle!(
26290 a, b,
26291 [ 0, 16, 1, 17,
26292 0 + 4, 16 + 4, 1 + 4, 17 + 4,
26293 0 + 8, 16 + 8, 1 + 8, 17 + 8,
26294 0 + 12, 16 + 12, 1 + 12, 17 + 12],
26295 );
26296 transmute(src:r)
26297 }
26298}
26299
26300/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26301///
26302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi32&expand=6076)
26303#[inline]
26304#[target_feature(enable = "avx512f")]
26305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26306#[cfg_attr(test, assert_instr(vpunpckldq))]
26307pub fn _mm512_mask_unpacklo_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26308 unsafe {
26309 let unpacklo: i32x16 = _mm512_unpacklo_epi32(a, b).as_i32x16();
26310 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x16()))
26311 }
26312}
26313
26314/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26315///
26316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi32&expand=6077)
26317#[inline]
26318#[target_feature(enable = "avx512f")]
26319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26320#[cfg_attr(test, assert_instr(vpunpckldq))]
26321pub fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
26322 unsafe {
26323 let unpacklo: i32x16 = _mm512_unpacklo_epi32(a, b).as_i32x16();
26324 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i32x16::ZERO))
26325 }
26326}
26327
26328/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26329///
26330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi32&expand=6073)
26331#[inline]
26332#[target_feature(enable = "avx512f,avx512vl")]
26333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26334#[cfg_attr(test, assert_instr(vpunpckldq))]
26335pub fn _mm256_mask_unpacklo_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26336 unsafe {
26337 let unpacklo: i32x8 = _mm256_unpacklo_epi32(a, b).as_i32x8();
26338 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x8()))
26339 }
26340}
26341
26342/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26343///
26344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi32&expand=6074)
26345#[inline]
26346#[target_feature(enable = "avx512f,avx512vl")]
26347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26348#[cfg_attr(test, assert_instr(vpunpckldq))]
26349pub fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26350 unsafe {
26351 let unpacklo: i32x8 = _mm256_unpacklo_epi32(a, b).as_i32x8();
26352 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i32x8::ZERO))
26353 }
26354}
26355
26356/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26357///
26358/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi32&expand=6070)
26359#[inline]
26360#[target_feature(enable = "avx512f,avx512vl")]
26361#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26362#[cfg_attr(test, assert_instr(vpunpckldq))]
26363pub fn _mm_mask_unpacklo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26364 unsafe {
26365 let unpacklo: i32x4 = _mm_unpacklo_epi32(a, b).as_i32x4();
26366 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i32x4()))
26367 }
26368}
26369
26370/// Unpack and interleave 32-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26371///
26372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi32&expand=6071)
26373#[inline]
26374#[target_feature(enable = "avx512f,avx512vl")]
26375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26376#[cfg_attr(test, assert_instr(vpunpckldq))]
26377pub fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26378 unsafe {
26379 let unpacklo: i32x4 = _mm_unpacklo_epi32(a, b).as_i32x4();
26380 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i32x4::ZERO))
26381 }
26382}
26383
26384/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst.
26385///
26386/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_epi64&expand=6087)
26387#[inline]
26388#[target_feature(enable = "avx512f")]
26389#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26390#[cfg_attr(test, assert_instr(vunpcklpd))] //should be vpunpcklqdq
26391pub fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
26392 unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
26393}
26394
26395/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26396///
26397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_epi64&expand=6085)
26398#[inline]
26399#[target_feature(enable = "avx512f")]
26400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26401#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26402pub fn _mm512_mask_unpacklo_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26403 unsafe {
26404 let unpacklo: i64x8 = _mm512_unpacklo_epi64(a, b).as_i64x8();
26405 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x8()))
26406 }
26407}
26408
26409/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26410///
26411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_epi64&expand=6086)
26412#[inline]
26413#[target_feature(enable = "avx512f")]
26414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26415#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26416pub fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
26417 unsafe {
26418 let unpacklo: i64x8 = _mm512_unpacklo_epi64(a, b).as_i64x8();
26419 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i64x8::ZERO))
26420 }
26421}
26422
26423/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26424///
26425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_epi64&expand=6082)
26426#[inline]
26427#[target_feature(enable = "avx512f,avx512vl")]
26428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26429#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26430pub fn _mm256_mask_unpacklo_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26431 unsafe {
26432 let unpacklo: i64x4 = _mm256_unpacklo_epi64(a, b).as_i64x4();
26433 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x4()))
26434 }
26435}
26436
26437/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26438///
26439/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_epi64&expand=6083)
26440#[inline]
26441#[target_feature(enable = "avx512f,avx512vl")]
26442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26443#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26444pub fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
26445 unsafe {
26446 let unpacklo: i64x4 = _mm256_unpacklo_epi64(a, b).as_i64x4();
26447 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i64x4::ZERO))
26448 }
26449}
26450
26451/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26452///
26453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_epi64&expand=6079)
26454#[inline]
26455#[target_feature(enable = "avx512f,avx512vl")]
26456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26457#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26458pub fn _mm_mask_unpacklo_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26459 unsafe {
26460 let unpacklo: i64x2 = _mm_unpacklo_epi64(a, b).as_i64x2();
26461 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_i64x2()))
26462 }
26463}
26464
26465/// Unpack and interleave 64-bit integers from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26466///
26467/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_epi64&expand=6080)
26468#[inline]
26469#[target_feature(enable = "avx512f,avx512vl")]
26470#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26471#[cfg_attr(test, assert_instr(vpunpcklqdq))]
26472pub fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
26473 unsafe {
26474 let unpacklo: i64x2 = _mm_unpacklo_epi64(a, b).as_i64x2();
26475 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:i64x2::ZERO))
26476 }
26477}
26478
26479/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26480///
26481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_ps&expand=6117)
26482#[inline]
26483#[target_feature(enable = "avx512f")]
26484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26485#[cfg_attr(test, assert_instr(vunpcklps))]
26486pub fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
26487 unsafe {
26488 #[rustfmt::skip]
26489 simd_shuffle!(a, b,
26490 [ 0, 16, 1, 17,
26491 0 + 4, 16 + 4, 1 + 4, 17 + 4,
26492 0 + 8, 16 + 8, 1 + 8, 17 + 8,
26493 0 + 12, 16 + 12, 1 + 12, 17 + 12],
26494 )
26495 }
26496}
26497
26498/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26499///
26500/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_ps&expand=6115)
26501#[inline]
26502#[target_feature(enable = "avx512f")]
26503#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26504#[cfg_attr(test, assert_instr(vunpcklps))]
26505pub fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
26506 unsafe {
26507 let unpacklo: f32x16 = _mm512_unpacklo_ps(a, b).as_f32x16();
26508 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x16()))
26509 }
26510}
26511
26512/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26513///
26514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_ps&expand=6116)
26515#[inline]
26516#[target_feature(enable = "avx512f")]
26517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26518#[cfg_attr(test, assert_instr(vunpcklps))]
26519pub fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
26520 unsafe {
26521 let unpacklo: f32x16 = _mm512_unpacklo_ps(a, b).as_f32x16();
26522 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f32x16::ZERO))
26523 }
26524}
26525
26526/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26527///
26528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_ps&expand=6112)
26529#[inline]
26530#[target_feature(enable = "avx512f,avx512vl")]
26531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26532#[cfg_attr(test, assert_instr(vunpcklps))]
26533pub fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
26534 unsafe {
26535 let unpacklo: f32x8 = _mm256_unpacklo_ps(a, b).as_f32x8();
26536 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x8()))
26537 }
26538}
26539
26540/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26541///
26542/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_ps&expand=6113)
26543#[inline]
26544#[target_feature(enable = "avx512f,avx512vl")]
26545#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26546#[cfg_attr(test, assert_instr(vunpcklps))]
26547pub fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
26548 unsafe {
26549 let unpacklo: f32x8 = _mm256_unpacklo_ps(a, b).as_f32x8();
26550 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f32x8::ZERO))
26551 }
26552}
26553
26554/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26555///
26556/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_ps&expand=6109)
26557#[inline]
26558#[target_feature(enable = "avx512f,avx512vl")]
26559#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26560#[cfg_attr(test, assert_instr(vunpcklps))]
26561pub fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
26562 unsafe {
26563 let unpacklo: f32x4 = _mm_unpacklo_ps(a, b).as_f32x4();
26564 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f32x4()))
26565 }
26566}
26567
26568/// Unpack and interleave single-precision (32-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26569///
26570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_ps&expand=6110)
26571#[inline]
26572#[target_feature(enable = "avx512f,avx512vl")]
26573#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26574#[cfg_attr(test, assert_instr(vunpcklps))]
26575pub fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
26576 unsafe {
26577 let unpacklo: f32x4 = _mm_unpacklo_ps(a, b).as_f32x4();
26578 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f32x4::ZERO))
26579 }
26580}
26581
26582/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst.
26583///
26584/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_unpacklo_pd&expand=6105)
26585#[inline]
26586#[target_feature(enable = "avx512f")]
26587#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26588#[cfg_attr(test, assert_instr(vunpcklpd))]
26589pub fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
26590 unsafe { simd_shuffle!(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6]) }
26591}
26592
26593/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26594///
26595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_unpacklo_pd&expand=6103)
26596#[inline]
26597#[target_feature(enable = "avx512f")]
26598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26599#[cfg_attr(test, assert_instr(vunpcklpd))]
26600pub fn _mm512_mask_unpacklo_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26601 unsafe {
26602 let unpacklo: f64x8 = _mm512_unpacklo_pd(a, b).as_f64x8();
26603 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x8()))
26604 }
26605}
26606
26607/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26608///
26609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_unpacklo_pd&expand=6104)
26610#[inline]
26611#[target_feature(enable = "avx512f")]
26612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26613#[cfg_attr(test, assert_instr(vunpcklpd))]
26614pub fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
26615 unsafe {
26616 let unpacklo: f64x8 = _mm512_unpacklo_pd(a, b).as_f64x8();
26617 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f64x8::ZERO))
26618 }
26619}
26620
26621/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26622///
26623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_unpacklo_pd&expand=6100)
26624#[inline]
26625#[target_feature(enable = "avx512f,avx512vl")]
26626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26627#[cfg_attr(test, assert_instr(vunpcklpd))]
26628pub fn _mm256_mask_unpacklo_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26629 unsafe {
26630 let unpacklo: f64x4 = _mm256_unpacklo_pd(a, b).as_f64x4();
26631 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x4()))
26632 }
26633}
26634
26635/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26636///
26637/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_unpacklo_pd&expand=6101)
26638#[inline]
26639#[target_feature(enable = "avx512f,avx512vl")]
26640#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26641#[cfg_attr(test, assert_instr(vunpcklpd))]
26642pub fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
26643 unsafe {
26644 let unpacklo: f64x4 = _mm256_unpacklo_pd(a, b).as_f64x4();
26645 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f64x4::ZERO))
26646 }
26647}
26648
26649/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26650///
26651/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_unpacklo_pd&expand=6097)
26652#[inline]
26653#[target_feature(enable = "avx512f,avx512vl")]
26654#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26655#[cfg_attr(test, assert_instr(vunpcklpd))]
26656pub fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26657 unsafe {
26658 let unpacklo: f64x2 = _mm_unpacklo_pd(a, b).as_f64x2();
26659 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:src.as_f64x2()))
26660 }
26661}
26662
26663/// Unpack and interleave double-precision (64-bit) floating-point elements from the low half of each 128-bit lane in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
26664///
26665/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_unpacklo_pd&expand=6098)
26666#[inline]
26667#[target_feature(enable = "avx512f,avx512vl")]
26668#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26669#[cfg_attr(test, assert_instr(vunpcklpd))]
26670pub fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
26671 unsafe {
26672 let unpacklo: f64x2 = _mm_unpacklo_pd(a, b).as_f64x2();
26673 transmute(src:simd_select_bitmask(m:k, yes:unpacklo, no:f64x2::ZERO))
26674 }
26675}
26676
26677/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26678///
26679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps128_ps512&expand=621)
26680#[inline]
26681#[target_feature(enable = "avx512f")]
26682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26683pub fn _mm512_castps128_ps512(a: __m128) -> __m512 {
26684 unsafe {
26685 simd_shuffle!(
26686 a,
26687 _mm_undefined_ps(),
26688 [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
26689 )
26690 }
26691}
26692
26693/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26694///
26695/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps256_ps512&expand=623)
26696#[inline]
26697#[target_feature(enable = "avx512f")]
26698#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26699pub fn _mm512_castps256_ps512(a: __m256) -> __m512 {
26700 unsafe {
26701 simd_shuffle!(
26702 a,
26703 _mm256_undefined_ps(),
26704 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
26705 )
26706 }
26707}
26708
26709/// Cast vector of type __m128 to type __m512; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26710///
26711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps128_ps512&expand=6196)
26712#[inline]
26713#[target_feature(enable = "avx512f")]
26714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26715pub fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
26716 unsafe {
26717 simd_shuffle!(
26718 a,
26719 _mm_set1_ps(0.),
26720 [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
26721 )
26722 }
26723}
26724
26725/// Cast vector of type __m256 to type __m512; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26726///
26727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextps256_ps512&expand=6197)
26728#[inline]
26729#[target_feature(enable = "avx512f")]
26730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26731pub fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
26732 unsafe {
26733 simd_shuffle!(
26734 a,
26735 _mm256_set1_ps(0.),
26736 [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
26737 )
26738 }
26739}
26740
26741/// Cast vector of type __m512 to type __m128. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26742///
26743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps128&expand=624)
26744#[inline]
26745#[target_feature(enable = "avx512f")]
26746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26747pub fn _mm512_castps512_ps128(a: __m512) -> __m128 {
26748 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26749}
26750
26751/// Cast vector of type __m512 to type __m256. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26752///
26753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps512_ps256&expand=625)
26754#[inline]
26755#[target_feature(enable = "avx512f")]
26756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26757pub fn _mm512_castps512_ps256(a: __m512) -> __m256 {
26758 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) }
26759}
26760
26761/// Cast vector of type __m512 to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26762///
26763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_pd&expand=616)
26764#[inline]
26765#[target_feature(enable = "avx512f")]
26766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26767pub fn _mm512_castps_pd(a: __m512) -> __m512d {
26768 unsafe { transmute(src:a) }
26769}
26770
26771/// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26772///
26773/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castps_si512&expand=619)
26774#[inline]
26775#[target_feature(enable = "avx512f")]
26776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26777pub fn _mm512_castps_si512(a: __m512) -> __m512i {
26778 unsafe { transmute(src:a) }
26779}
26780
26781/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26782///
26783/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd128_pd512&expand=609)
26784#[inline]
26785#[target_feature(enable = "avx512f")]
26786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26787pub fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
26788 unsafe { simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26789}
26790
26791/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26792///
26793/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd256_pd512&expand=611)
26794#[inline]
26795#[target_feature(enable = "avx512f")]
26796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26797pub fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
26798 unsafe { simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26799}
26800
26801/// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26802///
26803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd128_pd512&expand=6193)
26804#[inline]
26805#[target_feature(enable = "avx512f")]
26806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26807pub fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
26808 unsafe { simd_shuffle!(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2]) }
26809}
26810
26811/// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26812///
26813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextpd256_pd512&expand=6194)
26814#[inline]
26815#[target_feature(enable = "avx512f")]
26816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26817pub fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
26818 unsafe { simd_shuffle!(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4]) }
26819}
26820
26821/// Cast vector of type __m512d to type __m128d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26822///
26823/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd128&expand=612)
26824#[inline]
26825#[target_feature(enable = "avx512f")]
26826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26827pub fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
26828 unsafe { simd_shuffle!(a, a, [0, 1]) }
26829}
26830
26831/// Cast vector of type __m512d to type __m256d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26832///
26833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd512_pd256&expand=613)
26834#[inline]
26835#[target_feature(enable = "avx512f")]
26836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26837pub fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
26838 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26839}
26840
26841/// Cast vector of type __m512d to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26842///
26843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_ps&expand=604)
26844#[inline]
26845#[target_feature(enable = "avx512f")]
26846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26847pub fn _mm512_castpd_ps(a: __m512d) -> __m512 {
26848 unsafe { transmute(src:a) }
26849}
26850
26851/// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26852///
26853/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castpd_si512&expand=607)
26854#[inline]
26855#[target_feature(enable = "avx512f")]
26856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26857pub fn _mm512_castpd_si512(a: __m512d) -> __m512i {
26858 unsafe { transmute(src:a) }
26859}
26860
26861/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26862///
26863/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi128_si512&expand=629)
26864#[inline]
26865#[target_feature(enable = "avx512f")]
26866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26867pub fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
26868 unsafe { simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26869}
26870
26871/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26872///
26873/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi256_si512&expand=633)
26874#[inline]
26875#[target_feature(enable = "avx512f")]
26876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26877pub fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
26878 unsafe { simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26879}
26880
26881/// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26882///
26883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi128_si512&expand=6199)
26884#[inline]
26885#[target_feature(enable = "avx512f")]
26886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26887pub fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
26888 unsafe { simd_shuffle!(a, _mm_setzero_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) }
26889}
26890
26891/// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26892///
26893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_zextsi256_si512&expand=6200)
26894#[inline]
26895#[target_feature(enable = "avx512f")]
26896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26897pub fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
26898 unsafe { simd_shuffle!(a, _mm256_setzero_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) }
26899}
26900
26901/// Cast vector of type __m512i to type __m128i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26902///
26903/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si128&expand=636)
26904#[inline]
26905#[target_feature(enable = "avx512f")]
26906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26907pub fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
26908 unsafe { simd_shuffle!(a, a, [0, 1]) }
26909}
26910
26911/// Cast vector of type __m512i to type __m256i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26912///
26913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_si256&expand=637)
26914#[inline]
26915#[target_feature(enable = "avx512f")]
26916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26917pub fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
26918 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3]) }
26919}
26920
26921/// Cast vector of type __m512i to type __m512. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26922///
26923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_ps&expand=635)
26924#[inline]
26925#[target_feature(enable = "avx512f")]
26926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26927pub fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
26928 unsafe { transmute(src:a) }
26929}
26930
26931/// Cast vector of type __m512i to type __m512d. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
26932///
26933/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_castsi512_pd&expand=634)
26934#[inline]
26935#[target_feature(enable = "avx512f")]
26936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26937pub fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
26938 unsafe { transmute(src:a) }
26939}
26940
26941/// Copy the lower 32-bit integer in a to dst.
26942///
26943/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsi512_si32&expand=1882)
26944#[inline]
26945#[target_feature(enable = "avx512f")]
26946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26947#[cfg_attr(all(test, not(target_env = "msvc")), assert_instr(vmovd))]
26948pub fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
26949 unsafe { simd_extract!(a.as_i32x16(), 0) }
26950}
26951
26952/// Copy the lower single-precision (32-bit) floating-point element of a to dst.
26953///
26954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtss_f32)
26955#[inline]
26956#[target_feature(enable = "avx512f")]
26957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26958pub fn _mm512_cvtss_f32(a: __m512) -> f32 {
26959 unsafe { simd_extract!(a, 0) }
26960}
26961
26962/// Copy the lower double-precision (64-bit) floating-point element of a to dst.
26963///
26964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cvtsd_f64)
26965#[inline]
26966#[target_feature(enable = "avx512f")]
26967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26968pub fn _mm512_cvtsd_f64(a: __m512d) -> f64 {
26969 unsafe { simd_extract!(a, 0) }
26970}
26971
26972/// Broadcast the low packed 32-bit integer from a to all elements of dst.
26973///
26974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastd_epi32&expand=545)
26975#[inline]
26976#[target_feature(enable = "avx512f")]
26977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26978#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastd
26979pub fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
26980 unsafe {
26981 let a: i32x16 = _mm512_castsi128_si512(a).as_i32x16();
26982 let ret: i32x16 = simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
26983 transmute(src:ret)
26984 }
26985}
26986
26987/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
26988///
26989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastd_epi32&expand=546)
26990#[inline]
26991#[target_feature(enable = "avx512f")]
26992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
26993#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
26994pub fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
26995 unsafe {
26996 let broadcast: i32x16 = _mm512_broadcastd_epi32(a).as_i32x16();
26997 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x16()))
26998 }
26999}
27000
27001/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27002///
27003/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastd_epi32&expand=547)
27004#[inline]
27005#[target_feature(enable = "avx512f")]
27006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27007#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27008pub fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
27009 unsafe {
27010 let broadcast: i32x16 = _mm512_broadcastd_epi32(a).as_i32x16();
27011 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x16::ZERO))
27012 }
27013}
27014
27015/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27016///
27017/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastd_epi32&expand=543)
27018#[inline]
27019#[target_feature(enable = "avx512f,avx512vl")]
27020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27021#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27022pub fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27023 unsafe {
27024 let broadcast: i32x8 = _mm256_broadcastd_epi32(a).as_i32x8();
27025 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x8()))
27026 }
27027}
27028
27029/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27030///
27031/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastd_epi32&expand=544)
27032#[inline]
27033#[target_feature(enable = "avx512f,avx512vl")]
27034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27035#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27036pub fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
27037 unsafe {
27038 let broadcast: i32x8 = _mm256_broadcastd_epi32(a).as_i32x8();
27039 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x8::ZERO))
27040 }
27041}
27042
27043/// Broadcast the low packed 32-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27044///
27045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastd_epi32&expand=540)
27046#[inline]
27047#[target_feature(enable = "avx512f,avx512vl")]
27048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27049#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27050pub fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27051 unsafe {
27052 let broadcast: i32x4 = _mm_broadcastd_epi32(a).as_i32x4();
27053 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x4()))
27054 }
27055}
27056
27057/// Broadcast the low packed 32-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27058///
27059/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastd_epi32&expand=541)
27060#[inline]
27061#[target_feature(enable = "avx512f,avx512vl")]
27062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27063#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastd
27064pub fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
27065 unsafe {
27066 let broadcast: i32x4 = _mm_broadcastd_epi32(a).as_i32x4();
27067 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x4::ZERO))
27068 }
27069}
27070
27071/// Broadcast the low packed 64-bit integer from a to all elements of dst.
27072///
27073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastq_epi64&expand=560)
27074#[inline]
27075#[target_feature(enable = "avx512f")]
27076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27077#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq
27078pub fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
27079 unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
27080}
27081
27082/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27083///
27084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastq_epi64&expand=561)
27085#[inline]
27086#[target_feature(enable = "avx512f")]
27087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27088#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27089pub fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
27090 unsafe {
27091 let broadcast: i64x8 = _mm512_broadcastq_epi64(a).as_i64x8();
27092 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x8()))
27093 }
27094}
27095
27096/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27097///
27098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastq_epi64&expand=562)
27099#[inline]
27100#[target_feature(enable = "avx512f")]
27101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27102#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27103pub fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
27104 unsafe {
27105 let broadcast: i64x8 = _mm512_broadcastq_epi64(a).as_i64x8();
27106 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x8::ZERO))
27107 }
27108}
27109
27110/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27111///
27112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastq_epi64&expand=558)
27113#[inline]
27114#[target_feature(enable = "avx512f,avx512vl")]
27115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27116#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27117pub fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27118 unsafe {
27119 let broadcast: i64x4 = _mm256_broadcastq_epi64(a).as_i64x4();
27120 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x4()))
27121 }
27122}
27123
27124/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27125///
27126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastq_epi64&expand=559)
27127#[inline]
27128#[target_feature(enable = "avx512f,avx512vl")]
27129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27130#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27131pub fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
27132 unsafe {
27133 let broadcast: i64x4 = _mm256_broadcastq_epi64(a).as_i64x4();
27134 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x4::ZERO))
27135 }
27136}
27137
27138/// Broadcast the low packed 64-bit integer from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27139///
27140/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastq_epi64&expand=555)
27141#[inline]
27142#[target_feature(enable = "avx512f,avx512vl")]
27143#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27144#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27145pub fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
27146 unsafe {
27147 let broadcast: i64x2 = _mm_broadcastq_epi64(a).as_i64x2();
27148 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x2()))
27149 }
27150}
27151
27152/// Broadcast the low packed 64-bit integer from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27153///
27154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastq_epi64&expand=556)
27155#[inline]
27156#[target_feature(enable = "avx512f,avx512vl")]
27157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27158#[cfg_attr(test, assert_instr(vpbroadcast))] //should be vpbroadcastq
27159pub fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
27160 unsafe {
27161 let broadcast: i64x2 = _mm_broadcastq_epi64(a).as_i64x2();
27162 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x2::ZERO))
27163 }
27164}
27165
27166/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst.
27167///
27168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastss_ps&expand=578)
27169#[inline]
27170#[target_feature(enable = "avx512f")]
27171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27172#[cfg_attr(test, assert_instr(vbroadcastss))]
27173pub fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
27174 unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) }
27175}
27176
27177/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27178///
27179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastss_ps&expand=579)
27180#[inline]
27181#[target_feature(enable = "avx512f")]
27182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27183#[cfg_attr(test, assert_instr(vbroadcastss))]
27184pub fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27185 unsafe {
27186 let broadcast: f32x16 = _mm512_broadcastss_ps(a).as_f32x16();
27187 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x16()))
27188 }
27189}
27190
27191/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27192///
27193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastss_ps&expand=580)
27194#[inline]
27195#[target_feature(enable = "avx512f")]
27196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27197#[cfg_attr(test, assert_instr(vbroadcastss))]
27198pub fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
27199 unsafe {
27200 let broadcast: f32x16 = _mm512_broadcastss_ps(a).as_f32x16();
27201 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x16::ZERO))
27202 }
27203}
27204
27205/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27206///
27207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastss_ps&expand=576)
27208#[inline]
27209#[target_feature(enable = "avx512f,avx512vl")]
27210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27211#[cfg_attr(test, assert_instr(vbroadcastss))]
27212pub fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27213 unsafe {
27214 let broadcast: f32x8 = _mm256_broadcastss_ps(a).as_f32x8();
27215 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x8()))
27216 }
27217}
27218
27219/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27220///
27221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastss_ps&expand=577)
27222#[inline]
27223#[target_feature(enable = "avx512f,avx512vl")]
27224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27225#[cfg_attr(test, assert_instr(vbroadcastss))]
27226pub fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
27227 unsafe {
27228 let broadcast: f32x8 = _mm256_broadcastss_ps(a).as_f32x8();
27229 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x8::ZERO))
27230 }
27231}
27232
27233/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27234///
27235/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_broadcastss_ps&expand=573)
27236#[inline]
27237#[target_feature(enable = "avx512f,avx512vl")]
27238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27239#[cfg_attr(test, assert_instr(vbroadcastss))]
27240pub fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
27241 unsafe {
27242 let broadcast: f32x4 = _mm_broadcastss_ps(a).as_f32x4();
27243 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x4()))
27244 }
27245}
27246
27247/// Broadcast the low single-precision (32-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27248///
27249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_broadcastss_ps&expand=574)
27250#[inline]
27251#[target_feature(enable = "avx512f,avx512vl")]
27252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27253#[cfg_attr(test, assert_instr(vbroadcastss))]
27254pub fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
27255 unsafe {
27256 let broadcast: f32x4 = _mm_broadcastss_ps(a).as_f32x4();
27257 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x4::ZERO))
27258 }
27259}
27260
27261/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst.
27262///
27263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcastsd_pd&expand=567)
27264#[inline]
27265#[target_feature(enable = "avx512f")]
27266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27267#[cfg_attr(test, assert_instr(vbroadcastsd))]
27268pub fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
27269 unsafe { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) }
27270}
27271
27272/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27273///
27274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcastsd_pd&expand=568)
27275#[inline]
27276#[target_feature(enable = "avx512f")]
27277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27278#[cfg_attr(test, assert_instr(vbroadcastsd))]
27279pub fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
27280 unsafe {
27281 let broadcast: f64x8 = _mm512_broadcastsd_pd(a).as_f64x8();
27282 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x8()))
27283 }
27284}
27285
27286/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27287///
27288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcastsd_pd&expand=569)
27289#[inline]
27290#[target_feature(enable = "avx512f")]
27291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27292#[cfg_attr(test, assert_instr(vbroadcastsd))]
27293pub fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
27294 unsafe {
27295 let broadcast: f64x8 = _mm512_broadcastsd_pd(a).as_f64x8();
27296 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f64x8::ZERO))
27297 }
27298}
27299
27300/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27301///
27302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcastsd_pd&expand=565)
27303#[inline]
27304#[target_feature(enable = "avx512f,avx512vl")]
27305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27306#[cfg_attr(test, assert_instr(vbroadcastsd))]
27307pub fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
27308 unsafe {
27309 let broadcast: f64x4 = _mm256_broadcastsd_pd(a).as_f64x4();
27310 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x4()))
27311 }
27312}
27313
27314/// Broadcast the low double-precision (64-bit) floating-point element from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27315///
27316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcastsd_pd&expand=566)
27317#[inline]
27318#[target_feature(enable = "avx512f,avx512vl")]
27319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27320#[cfg_attr(test, assert_instr(vbroadcastsd))]
27321pub fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
27322 unsafe {
27323 let broadcast: f64x4 = _mm256_broadcastsd_pd(a).as_f64x4();
27324 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f64x4::ZERO))
27325 }
27326}
27327
27328/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27329///
27330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i32x4&expand=510)
27331#[inline]
27332#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27334pub fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
27335 unsafe {
27336 let a: i32x4 = a.as_i32x4();
27337 let ret: i32x16 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
27338 transmute(src:ret)
27339 }
27340}
27341
27342/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27343///
27344/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i32x4&expand=511)
27345#[inline]
27346#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27347#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27348pub fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
27349 unsafe {
27350 let broadcast: i32x16 = _mm512_broadcast_i32x4(a).as_i32x16();
27351 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x16()))
27352 }
27353}
27354
27355/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27356///
27357/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i32x4&expand=512)
27358#[inline]
27359#[target_feature(enable = "avx512f")] //msvc: vbroadcasti32x4, linux: vshuf
27360#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27361pub fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
27362 unsafe {
27363 let broadcast: i32x16 = _mm512_broadcast_i32x4(a).as_i32x16();
27364 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x16::ZERO))
27365 }
27366}
27367
27368/// Broadcast the 4 packed 32-bit integers from a to all elements of dst.
27369///
27370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_i32x4&expand=507)
27371#[inline]
27372#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27374pub fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
27375 unsafe {
27376 let a: i32x4 = a.as_i32x4();
27377 let ret: i32x8 = simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
27378 transmute(src:ret)
27379 }
27380}
27381
27382/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27383///
27384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_i32x4&expand=508)
27385#[inline]
27386#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27388pub fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
27389 unsafe {
27390 let broadcast: i32x8 = _mm256_broadcast_i32x4(a).as_i32x8();
27391 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i32x8()))
27392 }
27393}
27394
27395/// Broadcast the 4 packed 32-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27396///
27397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_i32x4&expand=509)
27398#[inline]
27399#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcasti32x4, linux: vshuf
27400#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27401pub fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
27402 unsafe {
27403 let broadcast: i32x8 = _mm256_broadcast_i32x4(a).as_i32x8();
27404 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i32x8::ZERO))
27405 }
27406}
27407
27408/// Broadcast the 4 packed 64-bit integers from a to all elements of dst.
27409///
27410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_i64x4&expand=522)
27411#[inline]
27412#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27414pub fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
27415 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27416}
27417
27418/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27419///
27420/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_i64x4&expand=523)
27421#[inline]
27422#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27423#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27424pub fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
27425 unsafe {
27426 let broadcast: i64x8 = _mm512_broadcast_i64x4(a).as_i64x8();
27427 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_i64x8()))
27428 }
27429}
27430
27431/// Broadcast the 4 packed 64-bit integers from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27432///
27433/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_i64x4&expand=524)
27434#[inline]
27435#[target_feature(enable = "avx512f")] //msvc: vbroadcasti64x4, linux: vperm
27436#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27437pub fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
27438 unsafe {
27439 let broadcast: i64x8 = _mm512_broadcast_i64x4(a).as_i64x8();
27440 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:i64x8::ZERO))
27441 }
27442}
27443
27444/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27445///
27446/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f32x4&expand=483)
27447#[inline]
27448#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshuf
27449#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27450pub fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
27451 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]) }
27452}
27453
27454/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27455///
27456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f32x4&expand=484)
27457#[inline]
27458#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27459#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27460pub fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
27461 unsafe {
27462 let broadcast: f32x16 = _mm512_broadcast_f32x4(a).as_f32x16();
27463 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x16()))
27464 }
27465}
27466
27467/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27468///
27469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f32x4&expand=485)
27470#[inline]
27471#[target_feature(enable = "avx512f")] //msvc: vbroadcastf32x4, linux: vshu
27472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27473pub fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
27474 unsafe {
27475 let broadcast: f32x16 = _mm512_broadcast_f32x4(a).as_f32x16();
27476 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x16::ZERO))
27477 }
27478}
27479
27480/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst.
27481///
27482/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_broadcast_f32x4&expand=480)
27483#[inline]
27484#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshuf
27485#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27486pub fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
27487 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27488}
27489
27490/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27491///
27492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_broadcast_f32x4&expand=481)
27493#[inline]
27494#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27496pub fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
27497 unsafe {
27498 let broadcast: f32x8 = _mm256_broadcast_f32x4(a).as_f32x8();
27499 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f32x8()))
27500 }
27501}
27502
27503/// Broadcast the 4 packed single-precision (32-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27504///
27505/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_broadcast_f32x4&expand=482)
27506#[inline]
27507#[target_feature(enable = "avx512f,avx512vl")] //msvc: vbroadcastf32x4, linux: vshu
27508#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27509pub fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
27510 unsafe {
27511 let broadcast: f32x8 = _mm256_broadcast_f32x4(a).as_f32x8();
27512 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f32x8::ZERO))
27513 }
27514}
27515
27516/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst.
27517///
27518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_broadcast_f64x4&expand=495)
27519#[inline]
27520#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vperm
27521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27522pub fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
27523 unsafe { simd_shuffle!(a, a, [0, 1, 2, 3, 0, 1, 2, 3]) }
27524}
27525
27526/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27527///
27528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_broadcast_f64x4&expand=496)
27529#[inline]
27530#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27532pub fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
27533 unsafe {
27534 let broadcast: f64x8 = _mm512_broadcast_f64x4(a).as_f64x8();
27535 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:src.as_f64x8()))
27536 }
27537}
27538
27539/// Broadcast the 4 packed double-precision (64-bit) floating-point elements from a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27540///
27541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_broadcast_f64x4&expand=497)
27542#[inline]
27543#[target_feature(enable = "avx512f")] //msvc: vbroadcastf64x4, linux: vper
27544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27545pub fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
27546 unsafe {
27547 let broadcast: f64x8 = _mm512_broadcast_f64x4(a).as_f64x8();
27548 transmute(src:simd_select_bitmask(m:k, yes:broadcast, no:f64x8::ZERO))
27549 }
27550}
27551
27552/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27553///
27554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi32&expand=435)
27555#[inline]
27556#[target_feature(enable = "avx512f")]
27557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27558#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27559pub fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27560 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x16(), no:a.as_i32x16())) }
27561}
27562
27563/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27564///
27565/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi32&expand=434)
27566#[inline]
27567#[target_feature(enable = "avx512f,avx512vl")]
27568#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27569#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27570pub fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27571 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x8(), no:a.as_i32x8())) }
27572}
27573
27574/// Blend packed 32-bit integers from a and b using control mask k, and store the results in dst.
27575///
27576/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi32&expand=432)
27577#[inline]
27578#[target_feature(enable = "avx512f,avx512vl")]
27579#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27580#[cfg_attr(test, assert_instr(vmovdqa32))] //should be vpblendmd
27581pub fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27582 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i32x4(), no:a.as_i32x4())) }
27583}
27584
27585/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27586///
27587/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_epi64&expand=438)
27588#[inline]
27589#[target_feature(enable = "avx512f")]
27590#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27591#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27592pub fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27593 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x8(), no:a.as_i64x8())) }
27594}
27595
27596/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27597///
27598/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_epi64&expand=437)
27599#[inline]
27600#[target_feature(enable = "avx512f,avx512vl")]
27601#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27602#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27603pub fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27604 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x4(), no:a.as_i64x4())) }
27605}
27606
27607/// Blend packed 64-bit integers from a and b using control mask k, and store the results in dst.
27608///
27609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_epi64&expand=436)
27610#[inline]
27611#[target_feature(enable = "avx512f,avx512vl")]
27612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27613#[cfg_attr(test, assert_instr(vmovdqa64))] //should be vpblendmq
27614pub fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27615 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_i64x2(), no:a.as_i64x2())) }
27616}
27617
27618/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27619///
27620/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_ps&expand=451)
27621#[inline]
27622#[target_feature(enable = "avx512f")]
27623#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27624#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27625pub fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
27626 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x16(), no:a.as_f32x16())) }
27627}
27628
27629/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27630///
27631/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_ps&expand=450)
27632#[inline]
27633#[target_feature(enable = "avx512f,avx512vl")]
27634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27635#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27636pub fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
27637 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x8(), no:a.as_f32x8())) }
27638}
27639
27640/// Blend packed single-precision (32-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27641///
27642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_ps&expand=448)
27643#[inline]
27644#[target_feature(enable = "avx512f,avx512vl")]
27645#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27646#[cfg_attr(test, assert_instr(vmovaps))] //should be vpblendmps
27647pub fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
27648 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f32x4(), no:a.as_f32x4())) }
27649}
27650
27651/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27652///
27653/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_blend_pd&expand=446)
27654#[inline]
27655#[target_feature(enable = "avx512f")]
27656#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27657#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27658pub fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
27659 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x8(), no:a.as_f64x8())) }
27660}
27661
27662/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27663///
27664/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_blend_pd&expand=445)
27665#[inline]
27666#[target_feature(enable = "avx512f,avx512vl")]
27667#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27668#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27669pub fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
27670 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x4(), no:a.as_f64x4())) }
27671}
27672
27673/// Blend packed double-precision (64-bit) floating-point elements from a and b using control mask k, and store the results in dst.
27674///
27675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_blend_pd&expand=443)
27676#[inline]
27677#[target_feature(enable = "avx512f,avx512vl")]
27678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27679#[cfg_attr(test, assert_instr(vmovapd))] //should be vpblendmpd
27680pub fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
27681 unsafe { transmute(src:simd_select_bitmask(m:k, yes:b.as_f64x2(), no:a.as_f64x2())) }
27682}
27683
27684/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst.
27685///
27686/// <div class="warning">Only lowest <strong>4 bits</strong> are used from the mask (shift at maximum by 60 bytes)!</div>
27687///
27688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi32&expand=245)
27689#[inline]
27690#[target_feature(enable = "avx512f")]
27691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27692#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27693#[rustc_legacy_const_generics(2)]
27694pub fn _mm512_alignr_epi32<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27695 unsafe {
27696 static_assert_uimm_bits!(IMM8, 8);
27697 let a = a.as_i32x16();
27698 let b = b.as_i32x16();
27699 let imm8: i32 = IMM8 % 16;
27700 let r: i32x16 = match imm8 {
27701 0 => simd_shuffle!(
27702 a,
27703 b,
27704 [
27705 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
27706 ],
27707 ),
27708 1 => simd_shuffle!(
27709 a,
27710 b,
27711 [
27712 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
27713 ],
27714 ),
27715 2 => simd_shuffle!(
27716 a,
27717 b,
27718 [18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
27719 ),
27720 3 => simd_shuffle!(
27721 a,
27722 b,
27723 [19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
27724 ),
27725 4 => simd_shuffle!(
27726 a,
27727 b,
27728 [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
27729 ),
27730 5 => simd_shuffle!(
27731 a,
27732 b,
27733 [21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
27734 ),
27735 6 => simd_shuffle!(
27736 a,
27737 b,
27738 [22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
27739 ),
27740 7 => simd_shuffle!(
27741 a,
27742 b,
27743 [23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
27744 ),
27745 8 => simd_shuffle!(
27746 a,
27747 b,
27748 [24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
27749 ),
27750 9 => simd_shuffle!(
27751 a,
27752 b,
27753 [25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
27754 ),
27755 10 => simd_shuffle!(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
27756 11 => simd_shuffle!(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
27757 12 => simd_shuffle!(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
27758 13 => simd_shuffle!(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
27759 14 => simd_shuffle!(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
27760 15 => simd_shuffle!(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
27761 _ => unreachable_unchecked(),
27762 };
27763 transmute(r)
27764 }
27765}
27766
27767/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 64 bytes (16 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27768///
27769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi32&expand=246)
27770#[inline]
27771#[target_feature(enable = "avx512f")]
27772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27773#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27774#[rustc_legacy_const_generics(4)]
27775pub fn _mm512_mask_alignr_epi32<const IMM8: i32>(
27776 src: __m512i,
27777 k: __mmask16,
27778 a: __m512i,
27779 b: __m512i,
27780) -> __m512i {
27781 unsafe {
27782 static_assert_uimm_bits!(IMM8, 8);
27783 let r: __m512i = _mm512_alignr_epi32::<IMM8>(a, b);
27784 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:src.as_i32x16()))
27785 }
27786}
27787
27788/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 32-bit elements, and stores the low 64 bytes (16 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27789///
27790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi32&expand=247)
27791#[inline]
27792#[target_feature(enable = "avx512f")]
27793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27794#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27795#[rustc_legacy_const_generics(3)]
27796pub fn _mm512_maskz_alignr_epi32<const IMM8: i32>(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
27797 unsafe {
27798 static_assert_uimm_bits!(IMM8, 8);
27799 let r: __m512i = _mm512_alignr_epi32::<IMM8>(a, b);
27800 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x16(), no:i32x16::ZERO))
27801 }
27802}
27803
27804/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst.
27805///
27806/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 28 bytes)!</div>
27807///
27808/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi32&expand=242)
27809#[inline]
27810#[target_feature(enable = "avx512f,avx512vl")]
27811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27812#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27813#[rustc_legacy_const_generics(2)]
27814pub fn _mm256_alignr_epi32<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
27815 unsafe {
27816 static_assert_uimm_bits!(IMM8, 8);
27817 let a: i32x8 = a.as_i32x8();
27818 let b: i32x8 = b.as_i32x8();
27819 let imm8: i32 = IMM8 % 8;
27820 let r: i32x8 = match imm8 {
27821 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
27822 1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
27823 2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
27824 3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
27825 4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
27826 5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
27827 6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
27828 7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
27829 _ => unreachable_unchecked(),
27830 };
27831 transmute(src:r)
27832 }
27833}
27834
27835/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27836///
27837/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi32&expand=243)
27838#[inline]
27839#[target_feature(enable = "avx512f,avx512vl")]
27840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27841#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27842#[rustc_legacy_const_generics(4)]
27843pub fn _mm256_mask_alignr_epi32<const IMM8: i32>(
27844 src: __m256i,
27845 k: __mmask8,
27846 a: __m256i,
27847 b: __m256i,
27848) -> __m256i {
27849 unsafe {
27850 static_assert_uimm_bits!(IMM8, 8);
27851 let r: __m256i = _mm256_alignr_epi32::<IMM8>(a, b);
27852 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:src.as_i32x8()))
27853 }
27854}
27855
27856/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 32 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27857///
27858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi32&expand=244)
27859#[inline]
27860#[target_feature(enable = "avx512f,avx512vl")]
27861#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27862#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27863#[rustc_legacy_const_generics(3)]
27864pub fn _mm256_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
27865 unsafe {
27866 static_assert_uimm_bits!(IMM8, 8);
27867 let r: __m256i = _mm256_alignr_epi32::<IMM8>(a, b);
27868 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x8(), no:i32x8::ZERO))
27869 }
27870}
27871
27872/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst.
27873///
27874/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 12 bytes)!</div>
27875///
27876/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi32&expand=239)
27877#[inline]
27878#[target_feature(enable = "avx512f,avx512vl")]
27879#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27880#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignd
27881#[rustc_legacy_const_generics(2)]
27882pub fn _mm_alignr_epi32<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
27883 unsafe {
27884 static_assert_uimm_bits!(IMM8, 8);
27885 let a: i32x4 = a.as_i32x4();
27886 let b: i32x4 = b.as_i32x4();
27887 let imm8: i32 = IMM8 % 4;
27888 let r: i32x4 = match imm8 {
27889 0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
27890 1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
27891 2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
27892 3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
27893 _ => unreachable_unchecked(),
27894 };
27895 transmute(src:r)
27896 }
27897}
27898
27899/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27900///
27901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi32&expand=240)
27902#[inline]
27903#[target_feature(enable = "avx512f,avx512vl")]
27904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27905#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27906#[rustc_legacy_const_generics(4)]
27907pub fn _mm_mask_alignr_epi32<const IMM8: i32>(
27908 src: __m128i,
27909 k: __mmask8,
27910 a: __m128i,
27911 b: __m128i,
27912) -> __m128i {
27913 unsafe {
27914 static_assert_uimm_bits!(IMM8, 8);
27915 let r: __m128i = _mm_alignr_epi32::<IMM8>(a, b);
27916 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:src.as_i32x4()))
27917 }
27918}
27919
27920/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 32-bit elements, and store the low 16 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27921///
27922/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi32&expand=241)
27923#[inline]
27924#[target_feature(enable = "avx512f,avx512vl")]
27925#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27926#[cfg_attr(test, assert_instr(valignd, IMM8 = 1))]
27927#[rustc_legacy_const_generics(3)]
27928pub fn _mm_maskz_alignr_epi32<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
27929 unsafe {
27930 static_assert_uimm_bits!(IMM8, 8);
27931 let r: __m128i = _mm_alignr_epi32::<IMM8>(a, b);
27932 transmute(src:simd_select_bitmask(m:k, yes:r.as_i32x4(), no:i32x4::ZERO))
27933 }
27934}
27935
27936/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst.
27937///
27938/// <div class="warning">Only lowest <strong>3 bits</strong> are used from the mask (shift at maximum by 56 bytes)!</div>
27939///
27940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_alignr_epi64&expand=254)
27941#[inline]
27942#[target_feature(enable = "avx512f")]
27943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27944#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27945#[rustc_legacy_const_generics(2)]
27946pub fn _mm512_alignr_epi64<const IMM8: i32>(a: __m512i, b: __m512i) -> __m512i {
27947 unsafe {
27948 static_assert_uimm_bits!(IMM8, 8);
27949 let imm8: i32 = IMM8 % 8;
27950 let r: i64x8 = match imm8 {
27951 0 => simd_shuffle!(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
27952 1 => simd_shuffle!(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
27953 2 => simd_shuffle!(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
27954 3 => simd_shuffle!(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
27955 4 => simd_shuffle!(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
27956 5 => simd_shuffle!(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
27957 6 => simd_shuffle!(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
27958 7 => simd_shuffle!(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
27959 _ => unreachable_unchecked(),
27960 };
27961 transmute(src:r)
27962 }
27963}
27964
27965/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 64 bytes (8 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
27966///
27967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_alignr_epi64&expand=255)
27968#[inline]
27969#[target_feature(enable = "avx512f")]
27970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27971#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27972#[rustc_legacy_const_generics(4)]
27973pub fn _mm512_mask_alignr_epi64<const IMM8: i32>(
27974 src: __m512i,
27975 k: __mmask8,
27976 a: __m512i,
27977 b: __m512i,
27978) -> __m512i {
27979 unsafe {
27980 static_assert_uimm_bits!(IMM8, 8);
27981 let r: __m512i = _mm512_alignr_epi64::<IMM8>(a, b);
27982 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:src.as_i64x8()))
27983 }
27984}
27985
27986/// Concatenate a and b into a 128-byte immediate result, shift the result right by imm8 64-bit elements, and stores the low 64 bytes (8 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
27987///
27988/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_alignr_epi64&expand=256)
27989#[inline]
27990#[target_feature(enable = "avx512f")]
27991#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
27992#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
27993#[rustc_legacy_const_generics(3)]
27994pub fn _mm512_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
27995 unsafe {
27996 static_assert_uimm_bits!(IMM8, 8);
27997 let r: __m512i = _mm512_alignr_epi64::<IMM8>(a, b);
27998 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x8(), no:i64x8::ZERO))
27999 }
28000}
28001
28002/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst.
28003///
28004/// <div class="warning">Only lowest <strong>2 bits</strong> are used from the mask (shift at maximum by 24 bytes)!</div>
28005///
28006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_alignr_epi64&expand=251)
28007#[inline]
28008#[target_feature(enable = "avx512f,avx512vl")]
28009#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28010#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28011#[rustc_legacy_const_generics(2)]
28012pub fn _mm256_alignr_epi64<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
28013 unsafe {
28014 static_assert_uimm_bits!(IMM8, 8);
28015 let imm8: i32 = IMM8 % 4;
28016 let r: i64x4 = match imm8 {
28017 0 => simd_shuffle!(a, b, [4, 5, 6, 7]),
28018 1 => simd_shuffle!(a, b, [5, 6, 7, 0]),
28019 2 => simd_shuffle!(a, b, [6, 7, 0, 1]),
28020 3 => simd_shuffle!(a, b, [7, 0, 1, 2]),
28021 _ => unreachable_unchecked(),
28022 };
28023 transmute(src:r)
28024 }
28025}
28026
28027/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28028///
28029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_alignr_epi64&expand=252)
28030#[inline]
28031#[target_feature(enable = "avx512f,avx512vl")]
28032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28033#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28034#[rustc_legacy_const_generics(4)]
28035pub fn _mm256_mask_alignr_epi64<const IMM8: i32>(
28036 src: __m256i,
28037 k: __mmask8,
28038 a: __m256i,
28039 b: __m256i,
28040) -> __m256i {
28041 unsafe {
28042 static_assert_uimm_bits!(IMM8, 8);
28043 let r: __m256i = _mm256_alignr_epi64::<IMM8>(a, b);
28044 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:src.as_i64x4()))
28045 }
28046}
28047
28048/// Concatenate a and b into a 64-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 32 bytes (4 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28049///
28050/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_alignr_epi64&expand=253)
28051#[inline]
28052#[target_feature(enable = "avx512f,avx512vl")]
28053#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28054#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28055#[rustc_legacy_const_generics(3)]
28056pub fn _mm256_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28057 unsafe {
28058 static_assert_uimm_bits!(IMM8, 8);
28059 let r: __m256i = _mm256_alignr_epi64::<IMM8>(a, b);
28060 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x4(), no:i64x4::ZERO))
28061 }
28062}
28063
28064/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst.
28065///
28066/// <div class="warning">Only lowest <strong>bit</strong> is used from the mask (shift at maximum by 8 bytes)!</div>
28067///
28068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_alignr_epi64&expand=248)
28069#[inline]
28070#[target_feature(enable = "avx512f,avx512vl")]
28071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28072#[cfg_attr(test, assert_instr(vpalignr, IMM8 = 1))] //should be valignq
28073#[rustc_legacy_const_generics(2)]
28074pub fn _mm_alignr_epi64<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
28075 unsafe {
28076 static_assert_uimm_bits!(IMM8, 8);
28077 let imm8: i32 = IMM8 % 2;
28078 let r: i64x2 = match imm8 {
28079 0 => simd_shuffle!(a, b, [2, 3]),
28080 1 => simd_shuffle!(a, b, [3, 0]),
28081 _ => unreachable_unchecked(),
28082 };
28083 transmute(src:r)
28084 }
28085}
28086
28087/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28088///
28089/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_alignr_epi64&expand=249)
28090#[inline]
28091#[target_feature(enable = "avx512f,avx512vl")]
28092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28093#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28094#[rustc_legacy_const_generics(4)]
28095pub fn _mm_mask_alignr_epi64<const IMM8: i32>(
28096 src: __m128i,
28097 k: __mmask8,
28098 a: __m128i,
28099 b: __m128i,
28100) -> __m128i {
28101 unsafe {
28102 static_assert_uimm_bits!(IMM8, 8);
28103 let r: __m128i = _mm_alignr_epi64::<IMM8>(a, b);
28104 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x2(), no:src.as_i64x2()))
28105 }
28106}
28107
28108/// Concatenate a and b into a 32-byte immediate result, shift the result right by imm8 64-bit elements, and store the low 16 bytes (2 elements) in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28109///
28110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_alignr_epi64&expand=250)
28111#[inline]
28112#[target_feature(enable = "avx512f,avx512vl")]
28113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28114#[cfg_attr(test, assert_instr(valignq, IMM8 = 1))]
28115#[rustc_legacy_const_generics(3)]
28116pub fn _mm_maskz_alignr_epi64<const IMM8: i32>(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28117 unsafe {
28118 static_assert_uimm_bits!(IMM8, 8);
28119 let r: __m128i = _mm_alignr_epi64::<IMM8>(a, b);
28120 transmute(src:simd_select_bitmask(m:k, yes:r.as_i64x2(), no:i64x2::ZERO))
28121 }
28122}
28123
28124/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst.
28125///
28126/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi32&expand=272)
28127#[inline]
28128#[target_feature(enable = "avx512f")]
28129#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28130#[cfg_attr(test, assert_instr(vpandq))] //should be vpandd, but generate vpandq
28131pub fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
28132 unsafe { transmute(src:simd_and(x:a.as_i32x16(), y:b.as_i32x16())) }
28133}
28134
28135/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28136///
28137/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi32&expand=273)
28138#[inline]
28139#[target_feature(enable = "avx512f")]
28140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28141#[cfg_attr(test, assert_instr(vpandd))]
28142pub fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28143 unsafe {
28144 let and: i32x16 = _mm512_and_epi32(a, b).as_i32x16();
28145 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x16()))
28146 }
28147}
28148
28149/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28150///
28151/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi32&expand=274)
28152#[inline]
28153#[target_feature(enable = "avx512f")]
28154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28155#[cfg_attr(test, assert_instr(vpandd))]
28156pub fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28157 unsafe {
28158 let and: i32x16 = _mm512_and_epi32(a, b).as_i32x16();
28159 transmute(src:simd_select_bitmask(m:k, yes:and, no:i32x16::ZERO))
28160 }
28161}
28162
28163/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28164///
28165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi32&expand=270)
28166#[inline]
28167#[target_feature(enable = "avx512f,avx512vl")]
28168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28169#[cfg_attr(test, assert_instr(vpandd))]
28170pub fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28171 unsafe {
28172 let and: i32x8 = simd_and(x:a.as_i32x8(), y:b.as_i32x8());
28173 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x8()))
28174 }
28175}
28176
28177/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28178///
28179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi32&expand=271)
28180#[inline]
28181#[target_feature(enable = "avx512f,avx512vl")]
28182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28183#[cfg_attr(test, assert_instr(vpandd))]
28184pub fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28185 unsafe {
28186 let and: i32x8 = simd_and(x:a.as_i32x8(), y:b.as_i32x8());
28187 transmute(src:simd_select_bitmask(m:k, yes:and, no:i32x8::ZERO))
28188 }
28189}
28190
28191/// Performs element-by-element bitwise AND between packed 32-bit integer elements of a and b, storing the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28192///
28193/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi32&expand=268)
28194#[inline]
28195#[target_feature(enable = "avx512f,avx512vl")]
28196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28197#[cfg_attr(test, assert_instr(vpandd))]
28198pub fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28199 unsafe {
28200 let and: i32x4 = simd_and(x:a.as_i32x4(), y:b.as_i32x4());
28201 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i32x4()))
28202 }
28203}
28204
28205/// Compute the bitwise AND of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28206///
28207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi32&expand=269)
28208#[inline]
28209#[target_feature(enable = "avx512f,avx512vl")]
28210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28211#[cfg_attr(test, assert_instr(vpandd))]
28212pub fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28213 unsafe {
28214 let and: i32x4 = simd_and(x:a.as_i32x4(), y:b.as_i32x4());
28215 transmute(src:simd_select_bitmask(m:k, yes:and, no:i32x4::ZERO))
28216 }
28217}
28218
28219/// Compute the bitwise AND of 512 bits (composed of packed 64-bit integers) in a and b, and store the results in dst.
28220///
28221/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_epi64&expand=279)
28222#[inline]
28223#[target_feature(enable = "avx512f")]
28224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28225#[cfg_attr(test, assert_instr(vpandq))]
28226pub fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
28227 unsafe { transmute(src:simd_and(x:a.as_i64x8(), y:b.as_i64x8())) }
28228}
28229
28230/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28231///
28232/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_epi64&expand=280)
28233#[inline]
28234#[target_feature(enable = "avx512f")]
28235#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28236#[cfg_attr(test, assert_instr(vpandq))]
28237pub fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28238 unsafe {
28239 let and: i64x8 = _mm512_and_epi64(a, b).as_i64x8();
28240 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x8()))
28241 }
28242}
28243
28244/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28245///
28246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_and_epi64&expand=281)
28247#[inline]
28248#[target_feature(enable = "avx512f")]
28249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28250#[cfg_attr(test, assert_instr(vpandq))]
28251pub fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28252 unsafe {
28253 let and: i64x8 = _mm512_and_epi64(a, b).as_i64x8();
28254 transmute(src:simd_select_bitmask(m:k, yes:and, no:i64x8::ZERO))
28255 }
28256}
28257
28258/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28259///
28260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_and_epi64&expand=277)
28261#[inline]
28262#[target_feature(enable = "avx512f,avx512vl")]
28263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28264#[cfg_attr(test, assert_instr(vpandq))]
28265pub fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28266 unsafe {
28267 let and: i64x4 = simd_and(x:a.as_i64x4(), y:b.as_i64x4());
28268 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x4()))
28269 }
28270}
28271
28272/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28273///
28274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_and_epi64&expand=278)
28275#[inline]
28276#[target_feature(enable = "avx512f,avx512vl")]
28277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28278#[cfg_attr(test, assert_instr(vpandq))]
28279pub fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28280 unsafe {
28281 let and: i64x4 = simd_and(x:a.as_i64x4(), y:b.as_i64x4());
28282 transmute(src:simd_select_bitmask(m:k, yes:and, no:i64x4::ZERO))
28283 }
28284}
28285
28286/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28287///
28288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_and_epi64&expand=275)
28289#[inline]
28290#[target_feature(enable = "avx512f,avx512vl")]
28291#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28292#[cfg_attr(test, assert_instr(vpandq))]
28293pub fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28294 unsafe {
28295 let and: i64x2 = simd_and(x:a.as_i64x2(), y:b.as_i64x2());
28296 transmute(src:simd_select_bitmask(m:k, yes:and, no:src.as_i64x2()))
28297 }
28298}
28299
28300/// Compute the bitwise AND of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28301///
28302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_and_epi64&expand=276)
28303#[inline]
28304#[target_feature(enable = "avx512f,avx512vl")]
28305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28306#[cfg_attr(test, assert_instr(vpandq))]
28307pub fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28308 unsafe {
28309 let and: i64x2 = simd_and(x:a.as_i64x2(), y:b.as_i64x2());
28310 transmute(src:simd_select_bitmask(m:k, yes:and, no:i64x2::ZERO))
28311 }
28312}
28313
28314/// Compute the bitwise AND of 512 bits (representing integer data) in a and b, and store the result in dst.
28315///
28316/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_and_si512&expand=302)
28317#[inline]
28318#[target_feature(enable = "avx512f")]
28319#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28320#[cfg_attr(test, assert_instr(vpandq))]
28321pub fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
28322 unsafe { transmute(src:simd_and(x:a.as_i32x16(), y:b.as_i32x16())) }
28323}
28324
28325/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28326///
28327/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi32&expand=4042)
28328#[inline]
28329#[target_feature(enable = "avx512f")]
28330#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28331#[cfg_attr(test, assert_instr(vporq))]
28332pub fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
28333 unsafe { transmute(src:simd_or(x:a.as_i32x16(), y:b.as_i32x16())) }
28334}
28335
28336/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28337///
28338/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi32&expand=4040)
28339#[inline]
28340#[target_feature(enable = "avx512f")]
28341#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28342#[cfg_attr(test, assert_instr(vpord))]
28343pub fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28344 unsafe {
28345 let or: i32x16 = _mm512_or_epi32(a, b).as_i32x16();
28346 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x16()))
28347 }
28348}
28349
28350/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28351///
28352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi32&expand=4041)
28353#[inline]
28354#[target_feature(enable = "avx512f")]
28355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28356#[cfg_attr(test, assert_instr(vpord))]
28357pub fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28358 unsafe {
28359 let or: i32x16 = _mm512_or_epi32(a, b).as_i32x16();
28360 transmute(src:simd_select_bitmask(m:k, yes:or, no:i32x16::ZERO))
28361 }
28362}
28363
28364/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28365///
28366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi32&expand=4039)
28367#[inline]
28368#[target_feature(enable = "avx512f,avx512vl")]
28369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28370#[cfg_attr(test, assert_instr(vor))] //should be vpord
28371pub fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
28372 unsafe { transmute(src:simd_or(x:a.as_i32x8(), y:b.as_i32x8())) }
28373}
28374
28375/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28376///
28377/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi32&expand=4037)
28378#[inline]
28379#[target_feature(enable = "avx512f,avx512vl")]
28380#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28381#[cfg_attr(test, assert_instr(vpord))]
28382pub fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28383 unsafe {
28384 let or: i32x8 = _mm256_or_epi32(a, b).as_i32x8();
28385 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x8()))
28386 }
28387}
28388
28389/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28390///
28391/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi32&expand=4038)
28392#[inline]
28393#[target_feature(enable = "avx512f,avx512vl")]
28394#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28395#[cfg_attr(test, assert_instr(vpord))]
28396pub fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28397 unsafe {
28398 let or: i32x8 = _mm256_or_epi32(a, b).as_i32x8();
28399 transmute(src:simd_select_bitmask(m:k, yes:or, no:i32x8::ZERO))
28400 }
28401}
28402
28403/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst.
28404///
28405/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi32&expand=4036)
28406#[inline]
28407#[target_feature(enable = "avx512f,avx512vl")]
28408#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28409#[cfg_attr(test, assert_instr(vor))] //should be vpord
28410pub fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
28411 unsafe { transmute(src:simd_or(x:a.as_i32x4(), y:b.as_i32x4())) }
28412}
28413
28414/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28415///
28416/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi32&expand=4034)
28417#[inline]
28418#[target_feature(enable = "avx512f,avx512vl")]
28419#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28420#[cfg_attr(test, assert_instr(vpord))]
28421pub fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28422 unsafe {
28423 let or: i32x4 = _mm_or_epi32(a, b).as_i32x4();
28424 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i32x4()))
28425 }
28426}
28427
28428/// Compute the bitwise OR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28429///
28430/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi32&expand=4035)
28431#[inline]
28432#[target_feature(enable = "avx512f,avx512vl")]
28433#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28434#[cfg_attr(test, assert_instr(vpord))]
28435pub fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28436 unsafe {
28437 let or: i32x4 = _mm_or_epi32(a, b).as_i32x4();
28438 transmute(src:simd_select_bitmask(m:k, yes:or, no:i32x4::ZERO))
28439 }
28440}
28441
28442/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28443///
28444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_epi64&expand=4051)
28445#[inline]
28446#[target_feature(enable = "avx512f")]
28447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28448#[cfg_attr(test, assert_instr(vporq))]
28449pub fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
28450 unsafe { transmute(src:simd_or(x:a.as_i64x8(), y:b.as_i64x8())) }
28451}
28452
28453/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28454///
28455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_or_epi64&expand=4049)
28456#[inline]
28457#[target_feature(enable = "avx512f")]
28458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28459#[cfg_attr(test, assert_instr(vporq))]
28460pub fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28461 unsafe {
28462 let or: i64x8 = _mm512_or_epi64(a, b).as_i64x8();
28463 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x8()))
28464 }
28465}
28466
28467/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28468///
28469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_or_epi64&expand=4050)
28470#[inline]
28471#[target_feature(enable = "avx512f")]
28472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28473#[cfg_attr(test, assert_instr(vporq))]
28474pub fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28475 unsafe {
28476 let or: i64x8 = _mm512_or_epi64(a, b).as_i64x8();
28477 transmute(src:simd_select_bitmask(m:k, yes:or, no:i64x8::ZERO))
28478 }
28479}
28480
28481/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28482///
28483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_epi64&expand=4048)
28484#[inline]
28485#[target_feature(enable = "avx512f,avx512vl")]
28486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28487#[cfg_attr(test, assert_instr(vor))] //should be vporq
28488pub fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
28489 unsafe { transmute(src:simd_or(x:a.as_i64x4(), y:b.as_i64x4())) }
28490}
28491
28492/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28493///
28494/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_or_epi64&expand=4046)
28495#[inline]
28496#[target_feature(enable = "avx512f,avx512vl")]
28497#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28498#[cfg_attr(test, assert_instr(vporq))]
28499pub fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28500 unsafe {
28501 let or: i64x4 = _mm256_or_epi64(a, b).as_i64x4();
28502 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x4()))
28503 }
28504}
28505
28506/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28507///
28508/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_or_epi64&expand=4047)
28509#[inline]
28510#[target_feature(enable = "avx512f,avx512vl")]
28511#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28512#[cfg_attr(test, assert_instr(vporq))]
28513pub fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28514 unsafe {
28515 let or: i64x4 = _mm256_or_epi64(a, b).as_i64x4();
28516 transmute(src:simd_select_bitmask(m:k, yes:or, no:i64x4::ZERO))
28517 }
28518}
28519
28520/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the resut in dst.
28521///
28522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_or_epi64&expand=4045)
28523#[inline]
28524#[target_feature(enable = "avx512f,avx512vl")]
28525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28526#[cfg_attr(test, assert_instr(vor))] //should be vporq
28527pub fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
28528 unsafe { transmute(src:simd_or(x:a.as_i64x2(), y:b.as_i64x2())) }
28529}
28530
28531/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28532///
28533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_or_epi64&expand=4043)
28534#[inline]
28535#[target_feature(enable = "avx512f,avx512vl")]
28536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28537#[cfg_attr(test, assert_instr(vporq))]
28538pub fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28539 unsafe {
28540 let or: i64x2 = _mm_or_epi64(a, b).as_i64x2();
28541 transmute(src:simd_select_bitmask(m:k, yes:or, no:src.as_i64x2()))
28542 }
28543}
28544
28545/// Compute the bitwise OR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28546///
28547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_or_epi64&expand=4044)
28548#[inline]
28549#[target_feature(enable = "avx512f,avx512vl")]
28550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28551#[cfg_attr(test, assert_instr(vporq))]
28552pub fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28553 unsafe {
28554 let or: i64x2 = _mm_or_epi64(a, b).as_i64x2();
28555 transmute(src:simd_select_bitmask(m:k, yes:or, no:i64x2::ZERO))
28556 }
28557}
28558
28559/// Compute the bitwise OR of 512 bits (representing integer data) in a and b, and store the result in dst.
28560///
28561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_si512&expand=4072)
28562#[inline]
28563#[target_feature(enable = "avx512f")]
28564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28565#[cfg_attr(test, assert_instr(vporq))]
28566pub fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
28567 unsafe { transmute(src:simd_or(x:a.as_i32x16(), y:b.as_i32x16())) }
28568}
28569
28570/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28571///
28572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi32&expand=6142)
28573#[inline]
28574#[target_feature(enable = "avx512f")]
28575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28576#[cfg_attr(test, assert_instr(vpxorq))] //should be vpxord
28577pub fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
28578 unsafe { transmute(src:simd_xor(x:a.as_i32x16(), y:b.as_i32x16())) }
28579}
28580
28581/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28582///
28583/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi32&expand=6140)
28584#[inline]
28585#[target_feature(enable = "avx512f")]
28586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28587#[cfg_attr(test, assert_instr(vpxord))]
28588pub fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28589 unsafe {
28590 let xor: i32x16 = _mm512_xor_epi32(a, b).as_i32x16();
28591 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x16()))
28592 }
28593}
28594
28595/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28596///
28597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi32&expand=6141)
28598#[inline]
28599#[target_feature(enable = "avx512f")]
28600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28601#[cfg_attr(test, assert_instr(vpxord))]
28602pub fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28603 unsafe {
28604 let xor: i32x16 = _mm512_xor_epi32(a, b).as_i32x16();
28605 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i32x16::ZERO))
28606 }
28607}
28608
28609/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28610///
28611/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi32&expand=6139)
28612#[inline]
28613#[target_feature(enable = "avx512f,avx512vl")]
28614#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28615#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28616pub fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
28617 unsafe { transmute(src:simd_xor(x:a.as_i32x8(), y:b.as_i32x8())) }
28618}
28619
28620/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28621///
28622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi32&expand=6137)
28623#[inline]
28624#[target_feature(enable = "avx512f,avx512vl")]
28625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28626#[cfg_attr(test, assert_instr(vpxord))]
28627pub fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28628 unsafe {
28629 let xor: i32x8 = _mm256_xor_epi32(a, b).as_i32x8();
28630 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x8()))
28631 }
28632}
28633
28634/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28635///
28636/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi32&expand=6138)
28637#[inline]
28638#[target_feature(enable = "avx512f,avx512vl")]
28639#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28640#[cfg_attr(test, assert_instr(vpxord))]
28641pub fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28642 unsafe {
28643 let xor: i32x8 = _mm256_xor_epi32(a, b).as_i32x8();
28644 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i32x8::ZERO))
28645 }
28646}
28647
28648/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst.
28649///
28650/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi32&expand=6136)
28651#[inline]
28652#[target_feature(enable = "avx512f,avx512vl")]
28653#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28654#[cfg_attr(test, assert_instr(vxor))] //should be vpxord
28655pub fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
28656 unsafe { transmute(src:simd_xor(x:a.as_i32x4(), y:b.as_i32x4())) }
28657}
28658
28659/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28660///
28661/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi32&expand=6134)
28662#[inline]
28663#[target_feature(enable = "avx512f,avx512vl")]
28664#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28665#[cfg_attr(test, assert_instr(vpxord))]
28666pub fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28667 unsafe {
28668 let xor: i32x4 = _mm_xor_epi32(a, b).as_i32x4();
28669 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i32x4()))
28670 }
28671}
28672
28673/// Compute the bitwise XOR of packed 32-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28674///
28675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi32&expand=6135)
28676#[inline]
28677#[target_feature(enable = "avx512f,avx512vl")]
28678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28679#[cfg_attr(test, assert_instr(vpxord))]
28680pub fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28681 unsafe {
28682 let xor: i32x4 = _mm_xor_epi32(a, b).as_i32x4();
28683 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i32x4::ZERO))
28684 }
28685}
28686
28687/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28688///
28689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_epi64&expand=6151)
28690#[inline]
28691#[target_feature(enable = "avx512f")]
28692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28693#[cfg_attr(test, assert_instr(vpxorq))]
28694pub fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
28695 unsafe { transmute(src:simd_xor(x:a.as_i64x8(), y:b.as_i64x8())) }
28696}
28697
28698/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28699///
28700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_xor_epi64&expand=6149)
28701#[inline]
28702#[target_feature(enable = "avx512f")]
28703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28704#[cfg_attr(test, assert_instr(vpxorq))]
28705pub fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28706 unsafe {
28707 let xor: i64x8 = _mm512_xor_epi64(a, b).as_i64x8();
28708 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x8()))
28709 }
28710}
28711
28712/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28713///
28714/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_xor_epi64&expand=6150)
28715#[inline]
28716#[target_feature(enable = "avx512f")]
28717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28718#[cfg_attr(test, assert_instr(vpxorq))]
28719pub fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28720 unsafe {
28721 let xor: i64x8 = _mm512_xor_epi64(a, b).as_i64x8();
28722 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i64x8::ZERO))
28723 }
28724}
28725
28726/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28727///
28728/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_epi64&expand=6148)
28729#[inline]
28730#[target_feature(enable = "avx512f,avx512vl")]
28731#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28732#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28733pub fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
28734 unsafe { transmute(src:simd_xor(x:a.as_i64x4(), y:b.as_i64x4())) }
28735}
28736
28737/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28738///
28739/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_xor_epi64&expand=6146)
28740#[inline]
28741#[target_feature(enable = "avx512f,avx512vl")]
28742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28743#[cfg_attr(test, assert_instr(vpxorq))]
28744pub fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28745 unsafe {
28746 let xor: i64x4 = _mm256_xor_epi64(a, b).as_i64x4();
28747 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x4()))
28748 }
28749}
28750
28751/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28752///
28753/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_xor_epi64&expand=6147)
28754#[inline]
28755#[target_feature(enable = "avx512f,avx512vl")]
28756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28757#[cfg_attr(test, assert_instr(vpxorq))]
28758pub fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28759 unsafe {
28760 let xor: i64x4 = _mm256_xor_epi64(a, b).as_i64x4();
28761 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i64x4::ZERO))
28762 }
28763}
28764
28765/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst.
28766///
28767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_xor_epi64&expand=6145)
28768#[inline]
28769#[target_feature(enable = "avx512f,avx512vl")]
28770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28771#[cfg_attr(test, assert_instr(vxor))] //should be vpxorq
28772pub fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
28773 unsafe { transmute(src:simd_xor(x:a.as_i64x2(), y:b.as_i64x2())) }
28774}
28775
28776/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28777///
28778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_xor_epi64&expand=6143)
28779#[inline]
28780#[target_feature(enable = "avx512f,avx512vl")]
28781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28782#[cfg_attr(test, assert_instr(vpxorq))]
28783pub fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28784 unsafe {
28785 let xor: i64x2 = _mm_xor_epi64(a, b).as_i64x2();
28786 transmute(src:simd_select_bitmask(m:k, yes:xor, no:src.as_i64x2()))
28787 }
28788}
28789
28790/// Compute the bitwise XOR of packed 64-bit integers in a and b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28791///
28792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_xor_epi64&expand=6144)
28793#[inline]
28794#[target_feature(enable = "avx512f,avx512vl")]
28795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28796#[cfg_attr(test, assert_instr(vpxorq))]
28797pub fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28798 unsafe {
28799 let xor: i64x2 = _mm_xor_epi64(a, b).as_i64x2();
28800 transmute(src:simd_select_bitmask(m:k, yes:xor, no:i64x2::ZERO))
28801 }
28802}
28803
28804/// Compute the bitwise XOR of 512 bits (representing integer data) in a and b, and store the result in dst.
28805///
28806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_si512&expand=6172)
28807#[inline]
28808#[target_feature(enable = "avx512f")]
28809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28810#[cfg_attr(test, assert_instr(vpxorq))]
28811pub fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
28812 unsafe { transmute(src:simd_xor(x:a.as_i32x16(), y:b.as_i32x16())) }
28813}
28814
28815/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst.
28816///
28817/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi32&expand=310)
28818#[inline]
28819#[target_feature(enable = "avx512f")]
28820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28821#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28822pub fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
28823 _mm512_and_epi32(a:_mm512_xor_epi32(a, b:_mm512_set1_epi32(u32::MAX as i32)), b)
28824}
28825
28826/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28827///
28828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi32&expand=311)
28829#[inline]
28830#[target_feature(enable = "avx512f")]
28831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28832#[cfg_attr(test, assert_instr(vpandnd))]
28833pub fn _mm512_mask_andnot_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28834 unsafe {
28835 let andnot: i32x16 = _mm512_andnot_epi32(a, b).as_i32x16();
28836 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x16()))
28837 }
28838}
28839
28840/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28841///
28842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi32&expand=312)
28843#[inline]
28844#[target_feature(enable = "avx512f")]
28845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28846#[cfg_attr(test, assert_instr(vpandnd))]
28847pub fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
28848 unsafe {
28849 let andnot: i32x16 = _mm512_andnot_epi32(a, b).as_i32x16();
28850 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i32x16::ZERO))
28851 }
28852}
28853
28854/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28855///
28856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi32&expand=308)
28857#[inline]
28858#[target_feature(enable = "avx512f,avx512vl")]
28859#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28860#[cfg_attr(test, assert_instr(vpandnd))]
28861pub fn _mm256_mask_andnot_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28862 unsafe {
28863 let not: __m256i = _mm256_xor_epi32(a, b:_mm256_set1_epi32(u32::MAX as i32));
28864 let andnot: i32x8 = simd_and(x:not.as_i32x8(), y:b.as_i32x8());
28865 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x8()))
28866 }
28867}
28868
28869/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28870///
28871/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi32&expand=309)
28872#[inline]
28873#[target_feature(enable = "avx512f,avx512vl")]
28874#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28875#[cfg_attr(test, assert_instr(vpandnd))]
28876pub fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28877 unsafe {
28878 let not: __m256i = _mm256_xor_epi32(a, b:_mm256_set1_epi32(u32::MAX as i32));
28879 let andnot: i32x8 = simd_and(x:not.as_i32x8(), y:b.as_i32x8());
28880 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i32x8::ZERO))
28881 }
28882}
28883
28884/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28885///
28886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi32&expand=306)
28887#[inline]
28888#[target_feature(enable = "avx512f,avx512vl")]
28889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28890#[cfg_attr(test, assert_instr(vpandnd))]
28891pub fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28892 unsafe {
28893 let not: __m128i = _mm_xor_epi32(a, b:_mm_set1_epi32(u32::MAX as i32));
28894 let andnot: i32x4 = simd_and(x:not.as_i32x4(), y:b.as_i32x4());
28895 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i32x4()))
28896 }
28897}
28898
28899/// Compute the bitwise NOT of packed 32-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28900///
28901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi32&expand=307)
28902#[inline]
28903#[target_feature(enable = "avx512f,avx512vl")]
28904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28905#[cfg_attr(test, assert_instr(vpandnd))]
28906pub fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28907 unsafe {
28908 let not: __m128i = _mm_xor_epi32(a, b:_mm_set1_epi32(u32::MAX as i32));
28909 let andnot: i32x4 = simd_and(x:not.as_i32x4(), y:b.as_i32x4());
28910 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i32x4::ZERO))
28911 }
28912}
28913
28914/// Compute the bitwise NOT of 512 bits (composed of packed 64-bit integers) in a and then AND with b, and store the results in dst.
28915///
28916/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_epi64&expand=317)
28917#[inline]
28918#[target_feature(enable = "avx512f")]
28919#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28920#[cfg_attr(test, assert_instr(vpandnq))] //should be vpandnd
28921pub fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
28922 _mm512_and_epi64(a:_mm512_xor_epi64(a, b:_mm512_set1_epi64(u64::MAX as i64)), b)
28923}
28924
28925/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28926///
28927/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_andnot_epi64&expand=318)
28928#[inline]
28929#[target_feature(enable = "avx512f")]
28930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28931#[cfg_attr(test, assert_instr(vpandnq))]
28932pub fn _mm512_mask_andnot_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28933 unsafe {
28934 let andnot: i64x8 = _mm512_andnot_epi64(a, b).as_i64x8();
28935 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x8()))
28936 }
28937}
28938
28939/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28940///
28941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_andnot_epi64&expand=319)
28942#[inline]
28943#[target_feature(enable = "avx512f")]
28944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28945#[cfg_attr(test, assert_instr(vpandnq))]
28946pub fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
28947 unsafe {
28948 let andnot: i64x8 = _mm512_andnot_epi64(a, b).as_i64x8();
28949 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i64x8::ZERO))
28950 }
28951}
28952
28953/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28954///
28955/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_andnot_epi64&expand=315)
28956#[inline]
28957#[target_feature(enable = "avx512f,avx512vl")]
28958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28959#[cfg_attr(test, assert_instr(vpandnq))]
28960pub fn _mm256_mask_andnot_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28961 unsafe {
28962 let not: __m256i = _mm256_xor_epi64(a, b:_mm256_set1_epi64x(u64::MAX as i64));
28963 let andnot: i64x4 = simd_and(x:not.as_i64x4(), y:b.as_i64x4());
28964 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x4()))
28965 }
28966}
28967
28968/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28969///
28970/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_andnot_epi64&expand=316)
28971#[inline]
28972#[target_feature(enable = "avx512f,avx512vl")]
28973#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28974#[cfg_attr(test, assert_instr(vpandnq))]
28975pub fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
28976 unsafe {
28977 let not: __m256i = _mm256_xor_epi64(a, b:_mm256_set1_epi64x(u64::MAX as i64));
28978 let andnot: i64x4 = simd_and(x:not.as_i64x4(), y:b.as_i64x4());
28979 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i64x4::ZERO))
28980 }
28981}
28982
28983/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
28984///
28985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_andnot_epi64&expand=313)
28986#[inline]
28987#[target_feature(enable = "avx512f,avx512vl")]
28988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
28989#[cfg_attr(test, assert_instr(vpandnq))]
28990pub fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
28991 unsafe {
28992 let not: __m128i = _mm_xor_epi64(a, b:_mm_set1_epi64x(u64::MAX as i64));
28993 let andnot: i64x2 = simd_and(x:not.as_i64x2(), y:b.as_i64x2());
28994 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:src.as_i64x2()))
28995 }
28996}
28997
28998/// Compute the bitwise NOT of packed 64-bit integers in a and then AND with b, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
28999///
29000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_andnot_epi64&expand=314)
29001#[inline]
29002#[target_feature(enable = "avx512f,avx512vl")]
29003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29004#[cfg_attr(test, assert_instr(vpandnq))]
29005pub fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
29006 unsafe {
29007 let not: __m128i = _mm_xor_epi64(a, b:_mm_set1_epi64x(u64::MAX as i64));
29008 let andnot: i64x2 = simd_and(x:not.as_i64x2(), y:b.as_i64x2());
29009 transmute(src:simd_select_bitmask(m:k, yes:andnot, no:i64x2::ZERO))
29010 }
29011}
29012
29013/// Compute the bitwise NOT of 512 bits (representing integer data) in a and then AND with b, and store the result in dst.
29014///
29015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_si512&expand=340)
29016#[inline]
29017#[target_feature(enable = "avx512f")]
29018#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29019#[cfg_attr(test, assert_instr(vpandnq))]
29020pub fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
29021 _mm512_and_epi64(a:_mm512_xor_epi64(a, b:_mm512_set1_epi64(u64::MAX as i64)), b)
29022}
29023
29024/// Convert 16-bit mask a into an integer value, and store the result in dst.
29025///
29026/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtmask16_u32)
29027#[inline]
29028#[target_feature(enable = "avx512f")]
29029#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29030pub fn _cvtmask16_u32(a: __mmask16) -> u32 {
29031 a as u32
29032}
29033
29034/// Convert 32-bit integer value a to an 16-bit mask and store the result in dst.
29035///
29036/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_cvtu32_mask16)
29037#[inline]
29038#[target_feature(enable = "avx512f")]
29039#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29040pub fn _cvtu32_mask16(a: u32) -> __mmask16 {
29041 a as __mmask16
29042}
29043
29044/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29045///
29046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kand_mask16&expand=3212)
29047#[inline]
29048#[target_feature(enable = "avx512f")]
29049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29050#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29051pub fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29052 a & b
29053}
29054
29055/// Compute the bitwise AND of 16-bit masks a and b, and store the result in k.
29056///
29057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kand&expand=3210)
29058#[inline]
29059#[target_feature(enable = "avx512f")]
29060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29061#[cfg_attr(test, assert_instr(and))] // generate normal and code instead of kandw
29062pub fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
29063 a & b
29064}
29065
29066/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29067///
29068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kor_mask16&expand=3239)
29069#[inline]
29070#[target_feature(enable = "avx512f")]
29071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29072#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29073pub fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29074 a | b
29075}
29076
29077/// Compute the bitwise OR of 16-bit masks a and b, and store the result in k.
29078///
29079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kor&expand=3237)
29080#[inline]
29081#[target_feature(enable = "avx512f")]
29082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29083#[cfg_attr(test, assert_instr(or))] // generate normal or code instead of korw
29084pub fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
29085 a | b
29086}
29087
29088/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29089///
29090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxor_mask16&expand=3291)
29091#[inline]
29092#[target_feature(enable = "avx512f")]
29093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29094#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29095pub fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29096 a ^ b
29097}
29098
29099/// Compute the bitwise XOR of 16-bit masks a and b, and store the result in k.
29100///
29101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxor&expand=3289)
29102#[inline]
29103#[target_feature(enable = "avx512f")]
29104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29105#[cfg_attr(test, assert_instr(xor))] // generate normal xor code instead of kxorw
29106pub fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
29107 a ^ b
29108}
29109
29110/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29111///
29112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=knot_mask16&expand=3233)
29113#[inline]
29114#[target_feature(enable = "avx512f")]
29115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29116pub fn _knot_mask16(a: __mmask16) -> __mmask16 {
29117 a ^ 0b11111111_11111111
29118}
29119
29120/// Compute the bitwise NOT of 16-bit mask a, and store the result in k.
29121///
29122/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_knot&expand=3231)
29123#[inline]
29124#[target_feature(enable = "avx512f")]
29125#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29126pub fn _mm512_knot(a: __mmask16) -> __mmask16 {
29127 a ^ 0b11111111_11111111
29128}
29129
29130/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29131///
29132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kandn_mask16&expand=3218)
29133#[inline]
29134#[target_feature(enable = "avx512f")]
29135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29136#[cfg_attr(test, assert_instr(not))] // generate normal and, not code instead of kandnw
29137pub fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29138 _mm512_kand(a:_mm512_knot(a), b)
29139}
29140
29141/// Compute the bitwise NOT of 16-bit masks a and then AND with b, and store the result in k.
29142///
29143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kandn&expand=3216)
29144#[inline]
29145#[target_feature(enable = "avx512f")]
29146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29147#[cfg_attr(test, assert_instr(not))] // generate normal and code instead of kandw
29148pub fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
29149 _mm512_kand(a:_mm512_knot(a), b)
29150}
29151
29152/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29153///
29154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=kxnor_mask16&expand=3285)
29155#[inline]
29156#[target_feature(enable = "avx512f")]
29157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29158#[cfg_attr(test, assert_instr(xor))] // generate normal xor, not code instead of kxnorw
29159pub fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
29160 _mm512_knot(_mm512_kxor(a, b))
29161}
29162
29163/// Compute the bitwise XNOR of 16-bit masks a and b, and store the result in k.
29164///
29165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kxnor&expand=3283)
29166#[inline]
29167#[target_feature(enable = "avx512f")]
29168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29169#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kandw
29170pub fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
29171 _mm512_knot(_mm512_kxor(a, b))
29172}
29173
29174/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29175/// store 0 in dst. If the result is all ones, store 1 in all_ones, otherwise store 0 in all_ones.
29176///
29177/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortest_mask16_u8)
29178#[inline]
29179#[target_feature(enable = "avx512f")]
29180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29181pub unsafe fn _kortest_mask16_u8(a: __mmask16, b: __mmask16, all_ones: *mut u8) -> u8 {
29182 let tmp: u16 = _kor_mask16(a, b);
29183 *all_ones = (tmp == 0xffff) as u8;
29184 (tmp == 0) as u8
29185}
29186
29187/// Compute the bitwise OR of 16-bit masks a and b. If the result is all ones, store 1 in dst, otherwise
29188/// store 0 in dst.
29189///
29190/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestc_mask16_u8)
29191#[inline]
29192#[target_feature(enable = "avx512f")]
29193#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29194pub fn _kortestc_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29195 (_kor_mask16(a, b) == 0xffff) as u8
29196}
29197
29198/// Compute the bitwise OR of 16-bit masks a and b. If the result is all zeros, store 1 in dst, otherwise
29199/// store 0 in dst.
29200///
29201/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kortestz_mask16_u8)
29202#[inline]
29203#[target_feature(enable = "avx512f")]
29204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29205pub fn _kortestz_mask16_u8(a: __mmask16, b: __mmask16) -> u8 {
29206 (_kor_mask16(a, b) == 0) as u8
29207}
29208
29209/// Shift 16-bit mask a left by count bits while shifting in zeros, and store the result in dst.
29210///
29211/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftli_mask16)
29212#[inline]
29213#[target_feature(enable = "avx512f")]
29214#[rustc_legacy_const_generics(1)]
29215#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29216pub fn _kshiftli_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29217 a << COUNT
29218}
29219
29220/// Shift 16-bit mask a right by count bits while shifting in zeros, and store the result in dst.
29221///
29222/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_kshiftri_mask16)
29223#[inline]
29224#[target_feature(enable = "avx512f")]
29225#[rustc_legacy_const_generics(1)]
29226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29227pub fn _kshiftri_mask16<const COUNT: u32>(a: __mmask16) -> __mmask16 {
29228 a >> COUNT
29229}
29230
29231/// Load 16-bit mask from memory
29232///
29233/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_load_mask16)
29234#[inline]
29235#[target_feature(enable = "avx512f")]
29236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29237pub unsafe fn _load_mask16(mem_addr: *const __mmask16) -> __mmask16 {
29238 *mem_addr
29239}
29240
29241/// Store 16-bit mask to memory
29242///
29243/// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_store_mask16)
29244#[inline]
29245#[target_feature(enable = "avx512f")]
29246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29247pub unsafe fn _store_mask16(mem_addr: *mut __mmask16, a: __mmask16) {
29248 *mem_addr = a;
29249}
29250
29251/// Copy 16-bit mask a to k.
29252///
29253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm512_kmov&expand=3228)
29254#[inline]
29255#[target_feature(enable = "avx512f")]
29256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29257#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29258pub fn _mm512_kmov(a: __mmask16) -> __mmask16 {
29259 a
29260}
29261
29262/// Converts integer mask into bitmask, storing the result in dst.
29263///
29264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_int2mask&expand=3189)
29265#[inline]
29266#[target_feature(enable = "avx512f")] // generate normal and code instead of kmovw
29267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29268pub fn _mm512_int2mask(mask: i32) -> __mmask16 {
29269 mask as u16
29270}
29271
29272/// Converts bit mask k1 into an integer value, storing the results in dst.
29273///
29274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask2int&expand=3544)
29275#[inline]
29276#[target_feature(enable = "avx512f")]
29277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29278#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kmovw
29279pub fn _mm512_mask2int(k1: __mmask16) -> i32 {
29280 k1 as i32
29281}
29282
29283/// Unpack and interleave 8 bits from masks a and b, and store the 16-bit result in k.
29284///
29285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kunpackb&expand=3280)
29286#[inline]
29287#[target_feature(enable = "avx512f")]
29288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29289#[cfg_attr(test, assert_instr(mov))] // generate normal and code instead of kunpckbw
29290pub fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
29291 ((a & 0xff) << 8) | (b & 0xff)
29292}
29293
29294/// Performs bitwise OR between k1 and k2, storing the result in dst. CF flag is set if dst consists of all 1's.
29295///
29296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestc&expand=3247)
29297#[inline]
29298#[target_feature(enable = "avx512f")]
29299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29300#[cfg_attr(test, assert_instr(cmp))] // generate normal and code instead of kortestw
29301pub fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
29302 let r: bool = (a | b) == 0b11111111_11111111;
29303 r as i32
29304}
29305
29306/// Performs bitwise OR between k1 and k2, storing the result in dst. ZF flag is set if dst is 0.
29307///
29308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_kortestz)
29309#[inline]
29310#[target_feature(enable = "avx512f")]
29311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29312#[cfg_attr(test, assert_instr(xor))] // generate normal and code instead of kortestw
29313pub fn _mm512_kortestz(a: __mmask16, b: __mmask16) -> i32 {
29314 let r: bool = (a | b) == 0;
29315 r as i32
29316}
29317
29318/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29319///
29320/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi32_mask&expand=5890)
29321#[inline]
29322#[target_feature(enable = "avx512f")]
29323#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29324#[cfg_attr(test, assert_instr(vptestmd))]
29325pub fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29326 let and: __m512i = _mm512_and_epi32(a, b);
29327 let zero: __m512i = _mm512_setzero_si512();
29328 _mm512_cmpneq_epi32_mask(a:and, b:zero)
29329}
29330
29331/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29332///
29333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi32_mask&expand=5889)
29334#[inline]
29335#[target_feature(enable = "avx512f")]
29336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29337#[cfg_attr(test, assert_instr(vptestmd))]
29338pub fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29339 let and: __m512i = _mm512_and_epi32(a, b);
29340 let zero: __m512i = _mm512_setzero_si512();
29341 _mm512_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
29342}
29343
29344/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29345///
29346/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi32_mask&expand=5888)
29347#[inline]
29348#[target_feature(enable = "avx512f,avx512vl")]
29349#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29350#[cfg_attr(test, assert_instr(vptestmd))]
29351pub fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29352 let and: __m256i = _mm256_and_si256(a, b);
29353 let zero: __m256i = _mm256_setzero_si256();
29354 _mm256_cmpneq_epi32_mask(a:and, b:zero)
29355}
29356
29357/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29358///
29359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi32_mask&expand=5887)
29360#[inline]
29361#[target_feature(enable = "avx512f,avx512vl")]
29362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29363#[cfg_attr(test, assert_instr(vptestmd))]
29364pub fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29365 let and: __m256i = _mm256_and_si256(a, b);
29366 let zero: __m256i = _mm256_setzero_si256();
29367 _mm256_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
29368}
29369
29370/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29371///
29372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi32_mask&expand=5886)
29373#[inline]
29374#[target_feature(enable = "avx512f,avx512vl")]
29375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29376#[cfg_attr(test, assert_instr(vptestmd))]
29377pub fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29378 let and: __m128i = _mm_and_si128(a, b);
29379 let zero: __m128i = _mm_setzero_si128();
29380 _mm_cmpneq_epi32_mask(a:and, b:zero)
29381}
29382
29383/// Compute the bitwise AND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29384///
29385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi32_mask&expand=5885)
29386#[inline]
29387#[target_feature(enable = "avx512f,avx512vl")]
29388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29389#[cfg_attr(test, assert_instr(vptestmd))]
29390pub fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29391 let and: __m128i = _mm_and_si128(a, b);
29392 let zero: __m128i = _mm_setzero_si128();
29393 _mm_mask_cmpneq_epi32_mask(k1:k, a:and, b:zero)
29394}
29395
29396/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29397///
29398/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_test_epi64_mask&expand=5896)
29399#[inline]
29400#[target_feature(enable = "avx512f")]
29401#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29402#[cfg_attr(test, assert_instr(vptestmq))]
29403pub fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29404 let and: __m512i = _mm512_and_epi64(a, b);
29405 let zero: __m512i = _mm512_setzero_si512();
29406 _mm512_cmpneq_epi64_mask(a:and, b:zero)
29407}
29408
29409/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29410///
29411/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_test_epi64_mask&expand=5895)
29412#[inline]
29413#[target_feature(enable = "avx512f")]
29414#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29415#[cfg_attr(test, assert_instr(vptestmq))]
29416pub fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29417 let and: __m512i = _mm512_and_epi64(a, b);
29418 let zero: __m512i = _mm512_setzero_si512();
29419 _mm512_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
29420}
29421
29422/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29423///
29424/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_test_epi64_mask&expand=5894)
29425#[inline]
29426#[target_feature(enable = "avx512f,avx512vl")]
29427#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29428#[cfg_attr(test, assert_instr(vptestmq))]
29429pub fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29430 let and: __m256i = _mm256_and_si256(a, b);
29431 let zero: __m256i = _mm256_setzero_si256();
29432 _mm256_cmpneq_epi64_mask(a:and, b:zero)
29433}
29434
29435/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29436///
29437/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_test_epi64_mask&expand=5893)
29438#[inline]
29439#[target_feature(enable = "avx512f,avx512vl")]
29440#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29441#[cfg_attr(test, assert_instr(vptestmq))]
29442pub fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29443 let and: __m256i = _mm256_and_si256(a, b);
29444 let zero: __m256i = _mm256_setzero_si256();
29445 _mm256_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
29446}
29447
29448/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is non-zero.
29449///
29450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_test_epi64_mask&expand=5892)
29451#[inline]
29452#[target_feature(enable = "avx512f,avx512vl")]
29453#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29454#[cfg_attr(test, assert_instr(vptestmq))]
29455pub fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29456 let and: __m128i = _mm_and_si128(a, b);
29457 let zero: __m128i = _mm_setzero_si128();
29458 _mm_cmpneq_epi64_mask(a:and, b:zero)
29459}
29460
29461/// Compute the bitwise AND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is non-zero.
29462///
29463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_test_epi64_mask&expand=5891)
29464#[inline]
29465#[target_feature(enable = "avx512f,avx512vl")]
29466#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29467#[cfg_attr(test, assert_instr(vptestmq))]
29468pub fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29469 let and: __m128i = _mm_and_si128(a, b);
29470 let zero: __m128i = _mm_setzero_si128();
29471 _mm_mask_cmpneq_epi64_mask(k1:k, a:and, b:zero)
29472}
29473
29474/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29475///
29476/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi32_mask&expand=5921)
29477#[inline]
29478#[target_feature(enable = "avx512f")]
29479#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29480#[cfg_attr(test, assert_instr(vptestnmd))]
29481pub fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
29482 let and: __m512i = _mm512_and_epi32(a, b);
29483 let zero: __m512i = _mm512_setzero_si512();
29484 _mm512_cmpeq_epi32_mask(a:and, b:zero)
29485}
29486
29487/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29488///
29489/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi32_mask&expand=5920)
29490#[inline]
29491#[target_feature(enable = "avx512f")]
29492#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29493#[cfg_attr(test, assert_instr(vptestnmd))]
29494pub fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
29495 let and: __m512i = _mm512_and_epi32(a, b);
29496 let zero: __m512i = _mm512_setzero_si512();
29497 _mm512_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
29498}
29499
29500/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29501///
29502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi32_mask&expand=5919)
29503#[inline]
29504#[target_feature(enable = "avx512f,avx512vl")]
29505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29506#[cfg_attr(test, assert_instr(vptestnmd))]
29507pub fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
29508 let and: __m256i = _mm256_and_si256(a, b);
29509 let zero: __m256i = _mm256_setzero_si256();
29510 _mm256_cmpeq_epi32_mask(a:and, b:zero)
29511}
29512
29513/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29514///
29515/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi32_mask&expand=5918)
29516#[inline]
29517#[target_feature(enable = "avx512f,avx512vl")]
29518#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29519#[cfg_attr(test, assert_instr(vptestnmd))]
29520pub fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29521 let and: __m256i = _mm256_and_si256(a, b);
29522 let zero: __m256i = _mm256_setzero_si256();
29523 _mm256_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
29524}
29525
29526/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29527///
29528/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi32_mask&expand=5917)
29529#[inline]
29530#[target_feature(enable = "avx512f,avx512vl")]
29531#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29532#[cfg_attr(test, assert_instr(vptestnmd))]
29533pub fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
29534 let and: __m128i = _mm_and_si128(a, b);
29535 let zero: __m128i = _mm_setzero_si128();
29536 _mm_cmpeq_epi32_mask(a:and, b:zero)
29537}
29538
29539/// Compute the bitwise NAND of packed 32-bit integers in a and b, producing intermediate 32-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29540///
29541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi32_mask&expand=5916)
29542#[inline]
29543#[target_feature(enable = "avx512f,avx512vl")]
29544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29545#[cfg_attr(test, assert_instr(vptestnmd))]
29546pub fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29547 let and: __m128i = _mm_and_si128(a, b);
29548 let zero: __m128i = _mm_setzero_si128();
29549 _mm_mask_cmpeq_epi32_mask(k1:k, a:and, b:zero)
29550}
29551
29552/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29553///
29554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_testn_epi64_mask&expand=5927)
29555#[inline]
29556#[target_feature(enable = "avx512f")]
29557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29558#[cfg_attr(test, assert_instr(vptestnmq))]
29559pub fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
29560 let and: __m512i = _mm512_and_epi64(a, b);
29561 let zero: __m512i = _mm512_setzero_si512();
29562 _mm512_cmpeq_epi64_mask(a:and, b:zero)
29563}
29564
29565/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29566///
29567/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_testn_epi64_mask&expand=5926)
29568#[inline]
29569#[target_feature(enable = "avx512f")]
29570#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29571#[cfg_attr(test, assert_instr(vptestnmq))]
29572pub fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
29573 let and: __m512i = _mm512_and_epi64(a, b);
29574 let zero: __m512i = _mm512_setzero_si512();
29575 _mm512_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
29576}
29577
29578/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29579///
29580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_testn_epi64_mask&expand=5925)
29581#[inline]
29582#[target_feature(enable = "avx512f,avx512vl")]
29583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29584#[cfg_attr(test, assert_instr(vptestnmq))]
29585pub fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
29586 let and: __m256i = _mm256_and_si256(a, b);
29587 let zero: __m256i = _mm256_setzero_si256();
29588 _mm256_cmpeq_epi64_mask(a:and, b:zero)
29589}
29590
29591/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29592///
29593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_testn_epi64_mask&expand=5924)
29594#[inline]
29595#[target_feature(enable = "avx512f,avx512vl")]
29596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29597#[cfg_attr(test, assert_instr(vptestnmq))]
29598pub fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
29599 let and: __m256i = _mm256_and_si256(a, b);
29600 let zero: __m256i = _mm256_setzero_si256();
29601 _mm256_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
29602}
29603
29604/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k if the intermediate value is zero.
29605///
29606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_testn_epi64_mask&expand=5923)
29607#[inline]
29608#[target_feature(enable = "avx512f,avx512vl")]
29609#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29610#[cfg_attr(test, assert_instr(vptestnmq))]
29611pub fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
29612 let and: __m128i = _mm_and_si128(a, b);
29613 let zero: __m128i = _mm_setzero_si128();
29614 _mm_cmpeq_epi64_mask(a:and, b:zero)
29615}
29616
29617/// Compute the bitwise NAND of packed 64-bit integers in a and b, producing intermediate 64-bit values, and set the corresponding bit in result mask k (subject to writemask k) if the intermediate value is zero.
29618///
29619/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_testn_epi64_mask&expand=5922)
29620#[inline]
29621#[target_feature(enable = "avx512f,avx512vl")]
29622#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29623#[cfg_attr(test, assert_instr(vptestnmq))]
29624pub fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
29625 let and: __m128i = _mm_and_si128(a, b);
29626 let zero: __m128i = _mm_setzero_si128();
29627 _mm_mask_cmpeq_epi64_mask(k1:k, a:and, b:zero)
29628}
29629
29630/// Store 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29631///
29632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_ps&expand=5671)
29633///
29634/// # Safety of non-temporal stores
29635///
29636/// After using this intrinsic, but before any other access to the memory that this intrinsic
29637/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29638/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29639/// return.
29640///
29641/// See [`_mm_sfence`] for details.
29642#[inline]
29643#[target_feature(enable = "avx512f")]
29644#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29645#[cfg_attr(test, assert_instr(vmovntps))]
29646#[allow(clippy::cast_ptr_alignment)]
29647pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
29648 crate::arch::asm!(
29649 vps!("vmovntps", ",{a}"),
29650 p = in(reg) mem_addr,
29651 a = in(zmm_reg) a,
29652 options(nostack, preserves_flags),
29653 );
29654}
29655
29656/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29657///
29658/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_pd&expand=5667)
29659///
29660/// # Safety of non-temporal stores
29661///
29662/// After using this intrinsic, but before any other access to the memory that this intrinsic
29663/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29664/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29665/// return.
29666///
29667/// See [`_mm_sfence`] for details.
29668#[inline]
29669#[target_feature(enable = "avx512f")]
29670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29671#[cfg_attr(test, assert_instr(vmovntpd))]
29672#[allow(clippy::cast_ptr_alignment)]
29673pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
29674 crate::arch::asm!(
29675 vps!("vmovntpd", ",{a}"),
29676 p = in(reg) mem_addr,
29677 a = in(zmm_reg) a,
29678 options(nostack, preserves_flags),
29679 );
29680}
29681
29682/// Store 512-bits of integer data from a into memory using a non-temporal memory hint. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
29683///
29684/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_si512&expand=5675)
29685///
29686/// # Safety of non-temporal stores
29687///
29688/// After using this intrinsic, but before any other access to the memory that this intrinsic
29689/// mutates, a call to [`_mm_sfence`] must be performed by the thread that used the intrinsic. In
29690/// particular, functions that call this intrinsic should generally call `_mm_sfence` before they
29691/// return.
29692///
29693/// See [`_mm_sfence`] for details.
29694#[inline]
29695#[target_feature(enable = "avx512f")]
29696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29697#[cfg_attr(test, assert_instr(vmovntdq))]
29698#[allow(clippy::cast_ptr_alignment)]
29699pub unsafe fn _mm512_stream_si512(mem_addr: *mut i32, a: __m512i) {
29700 crate::arch::asm!(
29701 vps!("vmovntdq", ",{a}"),
29702 p = in(reg) mem_addr,
29703 a = in(zmm_reg) a,
29704 options(nostack, preserves_flags),
29705 );
29706}
29707
29708/// Load 512-bits of integer data from memory into dst using a non-temporal memory hint. mem_addr
29709/// must be aligned on a 64-byte boundary or a general-protection exception may be generated. To
29710/// minimize caching, the data is flagged as non-temporal (unlikely to be used again soon)
29711///
29712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_stream_load_si512)
29713#[inline]
29714#[target_feature(enable = "avx512f")]
29715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29716pub unsafe fn _mm512_stream_load_si512(mem_addr: *const __m512i) -> __m512i {
29717 let dst: __m512i;
29718 crate::arch::asm!(
29719 vpl!("vmovntdqa {a}"),
29720 a = out(zmm_reg) dst,
29721 p = in(reg) mem_addr,
29722 options(pure, readonly, nostack, preserves_flags),
29723 );
29724 dst
29725}
29726
29727/// Sets packed 32-bit integers in `dst` with the supplied values.
29728///
29729/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_ps&expand=4931)
29730#[inline]
29731#[target_feature(enable = "avx512f")]
29732#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29733pub fn _mm512_set_ps(
29734 e0: f32,
29735 e1: f32,
29736 e2: f32,
29737 e3: f32,
29738 e4: f32,
29739 e5: f32,
29740 e6: f32,
29741 e7: f32,
29742 e8: f32,
29743 e9: f32,
29744 e10: f32,
29745 e11: f32,
29746 e12: f32,
29747 e13: f32,
29748 e14: f32,
29749 e15: f32,
29750) -> __m512 {
29751 _mm512_setr_ps(
29752 e0:e15, e1:e14, e2:e13, e3:e12, e4:e11, e5:e10, e6:e9, e7:e8, e8:e7, e9:e6, e10:e5, e11:e4, e12:e3, e13:e2, e14:e1, e15:e0,
29753 )
29754}
29755
29756/// Sets packed 32-bit integers in `dst` with the supplied values in
29757/// reverse order.
29758///
29759/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_ps&expand=5008)
29760#[inline]
29761#[target_feature(enable = "avx512f")]
29762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29763pub fn _mm512_setr_ps(
29764 e0: f32,
29765 e1: f32,
29766 e2: f32,
29767 e3: f32,
29768 e4: f32,
29769 e5: f32,
29770 e6: f32,
29771 e7: f32,
29772 e8: f32,
29773 e9: f32,
29774 e10: f32,
29775 e11: f32,
29776 e12: f32,
29777 e13: f32,
29778 e14: f32,
29779 e15: f32,
29780) -> __m512 {
29781 unsafe {
29782 let r: f32x16 = f32x16::new(
29783 x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7, x8:e8, x9:e9, x10:e10, x11:e11, x12:e12, x13:e13, x14:e14, x15:e15,
29784 );
29785 transmute(src:r)
29786 }
29787}
29788
29789/// Broadcast 64-bit float `a` to all elements of `dst`.
29790///
29791/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_pd&expand=4975)
29792#[inline]
29793#[target_feature(enable = "avx512f")]
29794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29795pub fn _mm512_set1_pd(a: f64) -> __m512d {
29796 unsafe { transmute(src:f64x8::splat(a)) }
29797}
29798
29799/// Broadcast 32-bit float `a` to all elements of `dst`.
29800///
29801/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_ps&expand=4981)
29802#[inline]
29803#[target_feature(enable = "avx512f")]
29804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29805pub fn _mm512_set1_ps(a: f32) -> __m512 {
29806 unsafe { transmute(src:f32x16::splat(a)) }
29807}
29808
29809/// Sets packed 32-bit integers in `dst` with the supplied values.
29810///
29811/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_epi32&expand=4908)
29812#[inline]
29813#[target_feature(enable = "avx512f")]
29814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29815pub fn _mm512_set_epi32(
29816 e15: i32,
29817 e14: i32,
29818 e13: i32,
29819 e12: i32,
29820 e11: i32,
29821 e10: i32,
29822 e9: i32,
29823 e8: i32,
29824 e7: i32,
29825 e6: i32,
29826 e5: i32,
29827 e4: i32,
29828 e3: i32,
29829 e2: i32,
29830 e1: i32,
29831 e0: i32,
29832) -> __m512i {
29833 _mm512_setr_epi32(
29834 e15:e0, e14:e1, e13:e2, e12:e3, e11:e4, e10:e5, e9:e6, e8:e7, e7:e8, e6:e9, e5:e10, e4:e11, e3:e12, e2:e13, e1:e14, e0:e15,
29835 )
29836}
29837
29838/// Broadcast 8-bit integer a to all elements of dst.
29839///
29840/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi8&expand=4972)
29841#[inline]
29842#[target_feature(enable = "avx512f")]
29843#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29844pub fn _mm512_set1_epi8(a: i8) -> __m512i {
29845 unsafe { transmute(src:i8x64::splat(a)) }
29846}
29847
29848/// Broadcast the low packed 16-bit integer from a to all elements of dst.
29849///
29850/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi16&expand=4944)
29851#[inline]
29852#[target_feature(enable = "avx512f")]
29853#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29854pub fn _mm512_set1_epi16(a: i16) -> __m512i {
29855 unsafe { transmute(src:i16x32::splat(a)) }
29856}
29857
29858/// Broadcast 32-bit integer `a` to all elements of `dst`.
29859///
29860/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi32)
29861#[inline]
29862#[target_feature(enable = "avx512f")]
29863#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29864pub fn _mm512_set1_epi32(a: i32) -> __m512i {
29865 unsafe { transmute(src:i32x16::splat(a)) }
29866}
29867
29868/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29869///
29870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi32&expand=4951)
29871#[inline]
29872#[target_feature(enable = "avx512f")]
29873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29874#[cfg_attr(test, assert_instr(vpbroadcastd))]
29875pub fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
29876 unsafe {
29877 let r: i32x16 = _mm512_set1_epi32(a).as_i32x16();
29878 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x16()))
29879 }
29880}
29881
29882/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29883///
29884/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi32&expand=4952)
29885#[inline]
29886#[target_feature(enable = "avx512f")]
29887#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29888#[cfg_attr(test, assert_instr(vpbroadcastd))]
29889pub fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
29890 unsafe {
29891 let r: i32x16 = _mm512_set1_epi32(a).as_i32x16();
29892 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x16::ZERO))
29893 }
29894}
29895
29896/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29897///
29898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi32&expand=4948)
29899#[inline]
29900#[target_feature(enable = "avx512f,avx512vl")]
29901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29902#[cfg_attr(test, assert_instr(vpbroadcastd))]
29903pub fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
29904 unsafe {
29905 let r: i32x8 = _mm256_set1_epi32(a).as_i32x8();
29906 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x8()))
29907 }
29908}
29909
29910/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29911///
29912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi32&expand=4949)
29913#[inline]
29914#[target_feature(enable = "avx512f,avx512vl")]
29915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29916#[cfg_attr(test, assert_instr(vpbroadcastd))]
29917pub fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
29918 unsafe {
29919 let r: i32x8 = _mm256_set1_epi32(a).as_i32x8();
29920 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x8::ZERO))
29921 }
29922}
29923
29924/// Broadcast 32-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29925///
29926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi32&expand=4945)
29927#[inline]
29928#[target_feature(enable = "avx512f,avx512vl")]
29929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29930#[cfg_attr(test, assert_instr(vpbroadcastd))]
29931pub fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
29932 unsafe {
29933 let r: i32x4 = _mm_set1_epi32(a).as_i32x4();
29934 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i32x4()))
29935 }
29936}
29937
29938/// Broadcast 32-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29939///
29940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi32&expand=4946)
29941#[inline]
29942#[target_feature(enable = "avx512f,avx512vl")]
29943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29944#[cfg_attr(test, assert_instr(vpbroadcastd))]
29945pub fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
29946 unsafe {
29947 let r: i32x4 = _mm_set1_epi32(a).as_i32x4();
29948 transmute(src:simd_select_bitmask(m:k, yes:r, no:i32x4::ZERO))
29949 }
29950}
29951
29952/// Broadcast 64-bit integer `a` to all elements of `dst`.
29953///
29954/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set1_epi64&expand=4961)
29955#[inline]
29956#[target_feature(enable = "avx512f")]
29957#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29958pub fn _mm512_set1_epi64(a: i64) -> __m512i {
29959 unsafe { transmute(src:i64x8::splat(a)) }
29960}
29961
29962/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29963///
29964/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_set1_epi64&expand=4959)
29965#[inline]
29966#[target_feature(enable = "avx512f")]
29967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29968#[cfg_attr(test, assert_instr(vpbroadcastq))]
29969pub fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
29970 unsafe {
29971 let r: i64x8 = _mm512_set1_epi64(a).as_i64x8();
29972 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x8()))
29973 }
29974}
29975
29976/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
29977///
29978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_set1_epi64&expand=4960)
29979#[inline]
29980#[target_feature(enable = "avx512f")]
29981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29982#[cfg_attr(test, assert_instr(vpbroadcastq))]
29983pub fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
29984 unsafe {
29985 let r: i64x8 = _mm512_set1_epi64(a).as_i64x8();
29986 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x8::ZERO))
29987 }
29988}
29989
29990/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
29991///
29992/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_set1_epi64&expand=4957)
29993#[inline]
29994#[target_feature(enable = "avx512f,avx512vl")]
29995#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
29996#[cfg_attr(test, assert_instr(vpbroadcastq))]
29997pub fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
29998 unsafe {
29999 let r: i64x4 = _mm256_set1_epi64x(a).as_i64x4();
30000 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x4()))
30001 }
30002}
30003
30004/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30005///
30006/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_set1_epi64&expand=4958)
30007#[inline]
30008#[target_feature(enable = "avx512f,avx512vl")]
30009#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30010#[cfg_attr(test, assert_instr(vpbroadcastq))]
30011pub fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
30012 unsafe {
30013 let r: i64x4 = _mm256_set1_epi64x(a).as_i64x4();
30014 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x4::ZERO))
30015 }
30016}
30017
30018/// Broadcast 64-bit integer a to all elements of dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
30019///
30020/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_set1_epi64&expand=4954)
30021#[inline]
30022#[target_feature(enable = "avx512f,avx512vl")]
30023#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30024#[cfg_attr(test, assert_instr(vpbroadcastq))]
30025pub fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
30026 unsafe {
30027 let r: i64x2 = _mm_set1_epi64x(a).as_i64x2();
30028 transmute(src:simd_select_bitmask(m:k, yes:r, no:src.as_i64x2()))
30029 }
30030}
30031
30032/// Broadcast 64-bit integer a to all elements of dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
30033///
30034/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_set1_epi64&expand=4955)
30035#[inline]
30036#[target_feature(enable = "avx512f,avx512vl")]
30037#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30038#[cfg_attr(test, assert_instr(vpbroadcastq))]
30039pub fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
30040 unsafe {
30041 let r: i64x2 = _mm_set1_epi64x(a).as_i64x2();
30042 transmute(src:simd_select_bitmask(m:k, yes:r, no:i64x2::ZERO))
30043 }
30044}
30045
30046/// Set packed 64-bit integers in dst with the repeated 4 element sequence.
30047///
30048/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set4_epi64&expand=4983)
30049#[inline]
30050#[target_feature(enable = "avx512f")]
30051#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30052pub fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30053 _mm512_set_epi64(e0:d, e1:c, e2:b, e3:a, e4:d, e5:c, e6:b, e7:a)
30054}
30055
30056/// Set packed 64-bit integers in dst with the repeated 4 element sequence in reverse order.
30057///
30058/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr4_epi64&expand=5010)
30059#[inline]
30060#[target_feature(enable = "avx512f")]
30061#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30062pub fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
30063 _mm512_set_epi64(e0:a, e1:b, e2:c, e3:d, e4:a, e5:b, e6:c, e7:d)
30064}
30065
30066/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30067///
30068/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_ps_mask&expand=1074)
30069#[inline]
30070#[target_feature(enable = "avx512f")]
30071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30072#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30073pub fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30074 _mm512_cmp_ps_mask::<_CMP_LT_OS>(a, b)
30075}
30076
30077/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30078///
30079/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_ps_mask&expand=1075)
30080#[inline]
30081#[target_feature(enable = "avx512f")]
30082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30083#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30084pub fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30085 _mm512_mask_cmp_ps_mask::<_CMP_LT_OS>(k1, a, b)
30086}
30087
30088/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30089///
30090/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_ps_mask&expand=1154)
30091#[inline]
30092#[target_feature(enable = "avx512f")]
30093#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30094#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30095pub fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30096 _mm512_cmp_ps_mask::<_CMP_NLT_US>(a, b)
30097}
30098
30099/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30100///
30101/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_ps_mask&expand=1155)
30102#[inline]
30103#[target_feature(enable = "avx512f")]
30104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30105#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30106pub fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30107 _mm512_mask_cmp_ps_mask::<_CMP_NLT_US>(k1, a, b)
30108}
30109
30110/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30111///
30112/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_ps_mask&expand=1013)
30113#[inline]
30114#[target_feature(enable = "avx512f")]
30115#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30116#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30117pub fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30118 _mm512_cmp_ps_mask::<_CMP_LE_OS>(a, b)
30119}
30120
30121/// Compare packed single-precision (32-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30122///
30123/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_ps_mask&expand=1014)
30124#[inline]
30125#[target_feature(enable = "avx512f")]
30126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30127#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30128pub fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30129 _mm512_mask_cmp_ps_mask::<_CMP_LE_OS>(k1, a, b)
30130}
30131
30132/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30133///
30134/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_ps_mask&expand=1146)
30135#[inline]
30136#[target_feature(enable = "avx512f")]
30137#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30138#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30139pub fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30140 _mm512_cmp_ps_mask::<_CMP_NLE_US>(a, b)
30141}
30142
30143/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30144///
30145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_ps_mask&expand=1147)
30146#[inline]
30147#[target_feature(enable = "avx512f")]
30148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30149#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30150pub fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30151 _mm512_mask_cmp_ps_mask::<_CMP_NLE_US>(k1, a, b)
30152}
30153
30154/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30155///
30156/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_ps_mask&expand=828)
30157#[inline]
30158#[target_feature(enable = "avx512f")]
30159#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30160#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30161pub fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30162 _mm512_cmp_ps_mask::<_CMP_EQ_OQ>(a, b)
30163}
30164
30165/// Compare packed single-precision (32-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30166///
30167/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_ps_mask&expand=829)
30168#[inline]
30169#[target_feature(enable = "avx512f")]
30170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30171#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30172pub fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30173 _mm512_mask_cmp_ps_mask::<_CMP_EQ_OQ>(k1, a, b)
30174}
30175
30176/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30177///
30178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_ps_mask&expand=1130)
30179#[inline]
30180#[target_feature(enable = "avx512f")]
30181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30182#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30183pub fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30184 _mm512_cmp_ps_mask::<_CMP_NEQ_UQ>(a, b)
30185}
30186
30187/// Compare packed single-precision (32-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30188///
30189/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_ps_mask&expand=1131)
30190#[inline]
30191#[target_feature(enable = "avx512f")]
30192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30193#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30194pub fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30195 _mm512_mask_cmp_ps_mask::<_CMP_NEQ_UQ>(k1, a, b)
30196}
30197
30198/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30199///
30200/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_ps_mask&expand=749)
30201#[inline]
30202#[target_feature(enable = "avx512f")]
30203#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30204#[rustc_legacy_const_generics(2)]
30205#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30206pub fn _mm512_cmp_ps_mask<const IMM8: i32>(a: __m512, b: __m512) -> __mmask16 {
30207 unsafe {
30208 static_assert_uimm_bits!(IMM8, 5);
30209 let neg_one: i16 = -1;
30210 let a: f32x16 = a.as_f32x16();
30211 let b: f32x16 = b.as_f32x16();
30212 let r: i16 = vcmpps(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
30213 r.cast_unsigned()
30214 }
30215}
30216
30217/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30218///
30219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_ps_mask&expand=750)
30220#[inline]
30221#[target_feature(enable = "avx512f")]
30222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30223#[rustc_legacy_const_generics(3)]
30224#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30225pub fn _mm512_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30226 unsafe {
30227 static_assert_uimm_bits!(IMM8, 5);
30228 let a: f32x16 = a.as_f32x16();
30229 let b: f32x16 = b.as_f32x16();
30230 let r: i16 = vcmpps(a, b, IMM8, m:k1 as i16, _MM_FROUND_CUR_DIRECTION);
30231 r.cast_unsigned()
30232 }
30233}
30234
30235/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30236///
30237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_ps_mask&expand=747)
30238#[inline]
30239#[target_feature(enable = "avx512f,avx512vl")]
30240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30241#[rustc_legacy_const_generics(2)]
30242#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30243pub fn _mm256_cmp_ps_mask<const IMM8: i32>(a: __m256, b: __m256) -> __mmask8 {
30244 unsafe {
30245 static_assert_uimm_bits!(IMM8, 5);
30246 let neg_one: i8 = -1;
30247 let a: f32x8 = a.as_f32x8();
30248 let b: f32x8 = b.as_f32x8();
30249 let r: i8 = vcmpps256(a, b, IMM8, m:neg_one);
30250 r.cast_unsigned()
30251 }
30252}
30253
30254/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30255///
30256/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_ps_mask&expand=748)
30257#[inline]
30258#[target_feature(enable = "avx512f,avx512vl")]
30259#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30260#[rustc_legacy_const_generics(3)]
30261#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30262pub fn _mm256_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m256, b: __m256) -> __mmask8 {
30263 unsafe {
30264 static_assert_uimm_bits!(IMM8, 5);
30265 let a: f32x8 = a.as_f32x8();
30266 let b: f32x8 = b.as_f32x8();
30267 let r: i8 = vcmpps256(a, b, IMM8, m:k1 as i8);
30268 r.cast_unsigned()
30269 }
30270}
30271
30272/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30273///
30274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps_mask&expand=745)
30275#[inline]
30276#[target_feature(enable = "avx512f,avx512vl")]
30277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30278#[rustc_legacy_const_generics(2)]
30279#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30280pub fn _mm_cmp_ps_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30281 unsafe {
30282 static_assert_uimm_bits!(IMM8, 5);
30283 let neg_one: i8 = -1;
30284 let a: f32x4 = a.as_f32x4();
30285 let b: f32x4 = b.as_f32x4();
30286 let r: i8 = vcmpps128(a, b, IMM8, m:neg_one);
30287 r.cast_unsigned()
30288 }
30289}
30290
30291/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30292///
30293/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ps_mask&expand=746)
30294#[inline]
30295#[target_feature(enable = "avx512f,avx512vl")]
30296#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30297#[rustc_legacy_const_generics(3)]
30298#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30299pub fn _mm_mask_cmp_ps_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30300 unsafe {
30301 static_assert_uimm_bits!(IMM8, 5);
30302 let a: f32x4 = a.as_f32x4();
30303 let b: f32x4 = b.as_f32x4();
30304 let r: i8 = vcmpps128(a, b, IMM8, m:k1 as i8);
30305 r.cast_unsigned()
30306 }
30307}
30308
30309/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30310/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30311///
30312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_ps_mask&expand=753)
30313#[inline]
30314#[target_feature(enable = "avx512f")]
30315#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30316#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30317#[rustc_legacy_const_generics(2, 3)]
30318pub fn _mm512_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30319 a: __m512,
30320 b: __m512,
30321) -> __mmask16 {
30322 unsafe {
30323 static_assert_uimm_bits!(IMM5, 5);
30324 static_assert_mantissas_sae!(SAE);
30325 let neg_one: i16 = -1;
30326 let a: f32x16 = a.as_f32x16();
30327 let b: f32x16 = b.as_f32x16();
30328 let r: i16 = vcmpps(a, b, IMM5, m:neg_one, SAE);
30329 r.cast_unsigned()
30330 }
30331}
30332
30333/// Compare packed single-precision (32-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30334/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30335///
30336/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_ps_mask&expand=754)
30337#[inline]
30338#[target_feature(enable = "avx512f")]
30339#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30340#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30341#[rustc_legacy_const_generics(3, 4)]
30342pub fn _mm512_mask_cmp_round_ps_mask<const IMM5: i32, const SAE: i32>(
30343 m: __mmask16,
30344 a: __m512,
30345 b: __m512,
30346) -> __mmask16 {
30347 unsafe {
30348 static_assert_uimm_bits!(IMM5, 5);
30349 static_assert_mantissas_sae!(SAE);
30350 let a: f32x16 = a.as_f32x16();
30351 let b: f32x16 = b.as_f32x16();
30352 let r: i16 = vcmpps(a, b, IMM5, m as i16, SAE);
30353 r.cast_unsigned()
30354 }
30355}
30356
30357/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30358///
30359/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_ps_mask&expand=1162)
30360#[inline]
30361#[target_feature(enable = "avx512f")]
30362#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30363#[cfg_attr(test, assert_instr(vcmp))] //should be vcmps
30364pub fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30365 _mm512_cmp_ps_mask::<_CMP_ORD_Q>(a, b)
30366}
30367
30368/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30369///
30370/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_ps_mask&expand=1163)
30371#[inline]
30372#[target_feature(enable = "avx512f")]
30373#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30374#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30375pub fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30376 _mm512_mask_cmp_ps_mask::<_CMP_ORD_Q>(k1, a, b)
30377}
30378
30379/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30380///
30381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_ps_mask&expand=1170)
30382#[inline]
30383#[target_feature(enable = "avx512f")]
30384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30385#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30386pub fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
30387 _mm512_cmp_ps_mask::<_CMP_UNORD_Q>(a, b)
30388}
30389
30390/// Compare packed single-precision (32-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30391///
30392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_ps_mask&expand=1171)
30393#[inline]
30394#[target_feature(enable = "avx512f")]
30395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30396#[cfg_attr(test, assert_instr(vcmp))] //should be vcmpps
30397pub fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
30398 _mm512_mask_cmp_ps_mask::<_CMP_UNORD_Q>(k1, a, b)
30399}
30400
30401/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k.
30402///
30403/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_pd_mask&expand=1071)
30404#[inline]
30405#[target_feature(enable = "avx512f")]
30406#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30407#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30408pub fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30409 _mm512_cmp_pd_mask::<_CMP_LT_OS>(a, b)
30410}
30411
30412/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30413///
30414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_pd_mask&expand=1072)
30415#[inline]
30416#[target_feature(enable = "avx512f")]
30417#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30418#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30419pub fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30420 _mm512_mask_cmp_pd_mask::<_CMP_LT_OS>(k1, a, b)
30421}
30422
30423/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k.
30424///
30425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnlt_pd_mask&expand=1151)
30426#[inline]
30427#[target_feature(enable = "avx512f")]
30428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30429#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30430pub fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30431 _mm512_cmp_pd_mask::<_CMP_NLT_US>(a, b)
30432}
30433
30434/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30435///
30436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnlt_pd_mask&expand=1152)
30437#[inline]
30438#[target_feature(enable = "avx512f")]
30439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30440#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30441pub fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30442 _mm512_mask_cmp_pd_mask::<_CMP_NLT_US>(k1:m, a, b)
30443}
30444
30445/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k.
30446///
30447/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_pd_mask&expand=1010)
30448#[inline]
30449#[target_feature(enable = "avx512f")]
30450#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30451#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30452pub fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30453 _mm512_cmp_pd_mask::<_CMP_LE_OS>(a, b)
30454}
30455
30456/// Compare packed double-precision (64-bit) floating-point elements in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30457///
30458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_pd_mask&expand=1011)
30459#[inline]
30460#[target_feature(enable = "avx512f")]
30461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30462#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30463pub fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30464 _mm512_mask_cmp_pd_mask::<_CMP_LE_OS>(k1, a, b)
30465}
30466
30467/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k.
30468///
30469/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpnle_pd_mask&expand=1143)
30470#[inline]
30471#[target_feature(enable = "avx512f")]
30472#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30473#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30474pub fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30475 _mm512_cmp_pd_mask::<_CMP_NLE_US>(a, b)
30476}
30477
30478/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30479///
30480/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpnle_pd_mask&expand=1144)
30481#[inline]
30482#[target_feature(enable = "avx512f")]
30483#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30484#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30485pub fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30486 _mm512_mask_cmp_pd_mask::<_CMP_NLE_US>(k1, a, b)
30487}
30488
30489/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k.
30490///
30491/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_pd_mask&expand=822)
30492#[inline]
30493#[target_feature(enable = "avx512f")]
30494#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30495#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30496pub fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30497 _mm512_cmp_pd_mask::<_CMP_EQ_OQ>(a, b)
30498}
30499
30500/// Compare packed double-precision (64-bit) floating-point elements in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30501///
30502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_pd_mask&expand=823)
30503#[inline]
30504#[target_feature(enable = "avx512f")]
30505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30506#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30507pub fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30508 _mm512_mask_cmp_pd_mask::<_CMP_EQ_OQ>(k1, a, b)
30509}
30510
30511/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k.
30512///
30513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_pd_mask&expand=1127)
30514#[inline]
30515#[target_feature(enable = "avx512f")]
30516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30517#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30518pub fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30519 _mm512_cmp_pd_mask::<_CMP_NEQ_UQ>(a, b)
30520}
30521
30522/// Compare packed double-precision (64-bit) floating-point elements in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30523///
30524/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_pd_mask&expand=1128)
30525#[inline]
30526#[target_feature(enable = "avx512f")]
30527#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30528#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30529pub fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30530 _mm512_mask_cmp_pd_mask::<_CMP_NEQ_UQ>(k1, a, b)
30531}
30532
30533/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30534///
30535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_pd_mask&expand=741)
30536#[inline]
30537#[target_feature(enable = "avx512f")]
30538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30539#[rustc_legacy_const_generics(2)]
30540#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30541pub fn _mm512_cmp_pd_mask<const IMM8: i32>(a: __m512d, b: __m512d) -> __mmask8 {
30542 unsafe {
30543 static_assert_uimm_bits!(IMM8, 5);
30544 let neg_one: i8 = -1;
30545 let a: f64x8 = a.as_f64x8();
30546 let b: f64x8 = b.as_f64x8();
30547 let r: i8 = vcmppd(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
30548 r.cast_unsigned()
30549 }
30550}
30551
30552/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30553///
30554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_pd_mask&expand=742)
30555#[inline]
30556#[target_feature(enable = "avx512f")]
30557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30558#[rustc_legacy_const_generics(3)]
30559#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30560pub fn _mm512_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30561 unsafe {
30562 static_assert_uimm_bits!(IMM8, 5);
30563 let a: f64x8 = a.as_f64x8();
30564 let b: f64x8 = b.as_f64x8();
30565 let r: i8 = vcmppd(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
30566 r.cast_unsigned()
30567 }
30568}
30569
30570/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30571///
30572/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_pd_mask&expand=739)
30573#[inline]
30574#[target_feature(enable = "avx512f,avx512vl")]
30575#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30576#[rustc_legacy_const_generics(2)]
30577#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30578pub fn _mm256_cmp_pd_mask<const IMM8: i32>(a: __m256d, b: __m256d) -> __mmask8 {
30579 unsafe {
30580 static_assert_uimm_bits!(IMM8, 5);
30581 let neg_one: i8 = -1;
30582 let a: f64x4 = a.as_f64x4();
30583 let b: f64x4 = b.as_f64x4();
30584 let r: i8 = vcmppd256(a, b, IMM8, m:neg_one);
30585 r.cast_unsigned()
30586 }
30587}
30588
30589/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30590///
30591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_pd_mask&expand=740)
30592#[inline]
30593#[target_feature(enable = "avx512f,avx512vl")]
30594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30595#[rustc_legacy_const_generics(3)]
30596#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30597pub fn _mm256_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m256d, b: __m256d) -> __mmask8 {
30598 unsafe {
30599 static_assert_uimm_bits!(IMM8, 5);
30600 let a: f64x4 = a.as_f64x4();
30601 let b: f64x4 = b.as_f64x4();
30602 let r: i8 = vcmppd256(a, b, IMM8, m:k1 as i8);
30603 r.cast_unsigned()
30604 }
30605}
30606
30607/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
30608///
30609/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd_mask&expand=737)
30610#[inline]
30611#[target_feature(enable = "avx512f,avx512vl")]
30612#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30613#[rustc_legacy_const_generics(2)]
30614#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30615pub fn _mm_cmp_pd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30616 unsafe {
30617 static_assert_uimm_bits!(IMM8, 5);
30618 let neg_one: i8 = -1;
30619 let a: f64x2 = a.as_f64x2();
30620 let b: f64x2 = b.as_f64x2();
30621 let r: i8 = vcmppd128(a, b, IMM8, m:neg_one);
30622 r.cast_unsigned()
30623 }
30624}
30625
30626/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30627///
30628/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_pd_mask&expand=738)
30629#[inline]
30630#[target_feature(enable = "avx512f,avx512vl")]
30631#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30632#[rustc_legacy_const_generics(3)]
30633#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30634pub fn _mm_mask_cmp_pd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30635 unsafe {
30636 static_assert_uimm_bits!(IMM8, 5);
30637 let a: f64x2 = a.as_f64x2();
30638 let b: f64x2 = b.as_f64x2();
30639 let r: i8 = vcmppd128(a, b, IMM8, m:k1 as i8);
30640 r.cast_unsigned()
30641 }
30642}
30643
30644/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.\
30645/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30646///
30647/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_round_pd_mask&expand=751)
30648#[inline]
30649#[target_feature(enable = "avx512f")]
30650#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30651#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30652#[rustc_legacy_const_generics(2, 3)]
30653pub fn _mm512_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30654 a: __m512d,
30655 b: __m512d,
30656) -> __mmask8 {
30657 unsafe {
30658 static_assert_uimm_bits!(IMM5, 5);
30659 static_assert_mantissas_sae!(SAE);
30660 let neg_one: i8 = -1;
30661 let a: f64x8 = a.as_f64x8();
30662 let b: f64x8 = b.as_f64x8();
30663 let r: i8 = vcmppd(a, b, IMM5, m:neg_one, SAE);
30664 r.cast_unsigned()
30665 }
30666}
30667
30668/// Compare packed double-precision (64-bit) floating-point elements in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).\
30669/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30670///
30671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_round_pd_mask&expand=752)
30672#[inline]
30673#[target_feature(enable = "avx512f")]
30674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30675#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30676#[rustc_legacy_const_generics(3, 4)]
30677pub fn _mm512_mask_cmp_round_pd_mask<const IMM5: i32, const SAE: i32>(
30678 k1: __mmask8,
30679 a: __m512d,
30680 b: __m512d,
30681) -> __mmask8 {
30682 unsafe {
30683 static_assert_uimm_bits!(IMM5, 5);
30684 static_assert_mantissas_sae!(SAE);
30685 let a: f64x8 = a.as_f64x8();
30686 let b: f64x8 = b.as_f64x8();
30687 let r: i8 = vcmppd(a, b, IMM5, m:k1 as i8, SAE);
30688 r.cast_unsigned()
30689 }
30690}
30691
30692/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k.
30693///
30694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpord_pd_mask&expand=1159)
30695#[inline]
30696#[target_feature(enable = "avx512f")]
30697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30698#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30699pub fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30700 _mm512_cmp_pd_mask::<_CMP_ORD_Q>(a, b)
30701}
30702
30703/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if neither is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30704///
30705/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpord_pd_mask&expand=1160)
30706#[inline]
30707#[target_feature(enable = "avx512f")]
30708#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30709#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30710pub fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30711 _mm512_mask_cmp_pd_mask::<_CMP_ORD_Q>(k1, a, b)
30712}
30713
30714/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k.
30715///
30716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpunord_pd_mask&expand=1167)
30717#[inline]
30718#[target_feature(enable = "avx512f")]
30719#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30720#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30721pub fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
30722 _mm512_cmp_pd_mask::<_CMP_UNORD_Q>(a, b)
30723}
30724
30725/// Compare packed double-precision (64-bit) floating-point elements in a and b to see if either is NaN, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30726///
30727/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpunord_pd_mask&expand=1168)
30728#[inline]
30729#[target_feature(enable = "avx512f")]
30730#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30731#[cfg_attr(test, assert_instr(vcmp))] //should be vcmppd
30732pub fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
30733 _mm512_mask_cmp_pd_mask::<_CMP_UNORD_Q>(k1, a, b)
30734}
30735
30736/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30737///
30738/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss_mask&expand=763)
30739#[inline]
30740#[target_feature(enable = "avx512f")]
30741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30742#[rustc_legacy_const_generics(2)]
30743#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30744pub fn _mm_cmp_ss_mask<const IMM8: i32>(a: __m128, b: __m128) -> __mmask8 {
30745 unsafe {
30746 static_assert_uimm_bits!(IMM8, 5);
30747 let neg_one: i8 = -1;
30748 let r: i8 = vcmpss(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
30749 r.cast_unsigned()
30750 }
30751}
30752
30753/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30754///
30755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_ss_mask&expand=764)
30756#[inline]
30757#[target_feature(enable = "avx512f")]
30758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30759#[rustc_legacy_const_generics(3)]
30760#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30761pub fn _mm_mask_cmp_ss_mask<const IMM8: i32>(k1: __mmask8, a: __m128, b: __m128) -> __mmask8 {
30762 unsafe {
30763 static_assert_uimm_bits!(IMM8, 5);
30764 let r: i8 = vcmpss(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
30765 r.cast_unsigned()
30766 }
30767}
30768
30769/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30770/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30771///
30772/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_ss_mask&expand=757)
30773#[inline]
30774#[target_feature(enable = "avx512f")]
30775#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30776#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30777#[rustc_legacy_const_generics(2, 3)]
30778pub fn _mm_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> __mmask8 {
30779 unsafe {
30780 static_assert_uimm_bits!(IMM5, 5);
30781 static_assert_mantissas_sae!(SAE);
30782 let neg_one: i8 = -1;
30783 let r: i8 = vcmpss(a, b, IMM5, m:neg_one, SAE);
30784 r.cast_unsigned()
30785 }
30786}
30787
30788/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not seti).\
30789/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30790///
30791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_ss_mask&expand=758)
30792#[inline]
30793#[target_feature(enable = "avx512f")]
30794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30795#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30796#[rustc_legacy_const_generics(3, 4)]
30797pub fn _mm_mask_cmp_round_ss_mask<const IMM5: i32, const SAE: i32>(
30798 k1: __mmask8,
30799 a: __m128,
30800 b: __m128,
30801) -> __mmask8 {
30802 unsafe {
30803 static_assert_uimm_bits!(IMM5, 5);
30804 static_assert_mantissas_sae!(SAE);
30805 let r: i8 = vcmpss(a, b, IMM5, m:k1 as i8, SAE);
30806 r.cast_unsigned()
30807 }
30808}
30809
30810/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.
30811///
30812/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd_mask&expand=760)
30813#[inline]
30814#[target_feature(enable = "avx512f")]
30815#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30816#[rustc_legacy_const_generics(2)]
30817#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30818pub fn _mm_cmp_sd_mask<const IMM8: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30819 unsafe {
30820 static_assert_uimm_bits!(IMM8, 5);
30821 let neg_one: i8 = -1;
30822 let r: i8 = vcmpsd(a, b, IMM8, m:neg_one, _MM_FROUND_CUR_DIRECTION);
30823 r.cast_unsigned()
30824 }
30825}
30826
30827/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).
30828///
30829/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_sd_mask&expand=761)
30830#[inline]
30831#[target_feature(enable = "avx512f")]
30832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30833#[rustc_legacy_const_generics(3)]
30834#[cfg_attr(test, assert_instr(vcmp, IMM8 = 0))]
30835pub fn _mm_mask_cmp_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d, b: __m128d) -> __mmask8 {
30836 unsafe {
30837 static_assert_uimm_bits!(IMM8, 5);
30838 let r: i8 = vcmpsd(a, b, IMM8, m:k1 as i8, _MM_FROUND_CUR_DIRECTION);
30839 r.cast_unsigned()
30840 }
30841}
30842
30843/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k.\
30844/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30845///
30846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_round_sd_mask&expand=755)
30847#[inline]
30848#[target_feature(enable = "avx512f")]
30849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30850#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30851#[rustc_legacy_const_generics(2, 3)]
30852pub fn _mm_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __mmask8 {
30853 unsafe {
30854 static_assert_uimm_bits!(IMM5, 5);
30855 static_assert_mantissas_sae!(SAE);
30856 let neg_one: i8 = -1;
30857 let r: i8 = vcmpsd(a, b, IMM5, m:neg_one, SAE);
30858 r.cast_unsigned()
30859 }
30860}
30861
30862/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and store the result in mask vector k using zeromask k1 (the element is zeroed out when mask bit 0 is not set).\
30863/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
30864///
30865/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_round_sd_mask&expand=756)
30866#[inline]
30867#[target_feature(enable = "avx512f")]
30868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30869#[cfg_attr(test, assert_instr(vcmp, IMM5 = 0, SAE = 4))]
30870#[rustc_legacy_const_generics(3, 4)]
30871pub fn _mm_mask_cmp_round_sd_mask<const IMM5: i32, const SAE: i32>(
30872 k1: __mmask8,
30873 a: __m128d,
30874 b: __m128d,
30875) -> __mmask8 {
30876 unsafe {
30877 static_assert_uimm_bits!(IMM5, 5);
30878 static_assert_mantissas_sae!(SAE);
30879 let r: i8 = vcmpsd(a, b, IMM5, m:k1 as i8, SAE);
30880 r.cast_unsigned()
30881 }
30882}
30883
30884/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30885///
30886/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu32_mask&expand=1056)
30887#[inline]
30888#[target_feature(enable = "avx512f")]
30889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30890#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30891pub fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30892 unsafe { simd_bitmask::<u32x16, _>(simd_lt(x:a.as_u32x16(), y:b.as_u32x16())) }
30893}
30894
30895/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30896///
30897/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu32_mask&expand=1057)
30898#[inline]
30899#[target_feature(enable = "avx512f")]
30900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30901#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30902pub fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30903 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30904}
30905
30906/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30907///
30908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu32_mask&expand=1054)
30909#[inline]
30910#[target_feature(enable = "avx512f,avx512vl")]
30911#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30912#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30913pub fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30914 unsafe { simd_bitmask::<u32x8, _>(simd_lt(x:a.as_u32x8(), y:b.as_u32x8())) }
30915}
30916
30917/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30918///
30919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu32_mask&expand=1055)
30920#[inline]
30921#[target_feature(enable = "avx512f,avx512vl")]
30922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30923#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30924pub fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30925 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30926}
30927
30928/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k.
30929///
30930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu32_mask&expand=1052)
30931#[inline]
30932#[target_feature(enable = "avx512f,avx512vl")]
30933#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30934#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30935pub fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
30936 unsafe { simd_bitmask::<u32x4, _>(simd_lt(x:a.as_u32x4(), y:b.as_u32x4())) }
30937}
30938
30939/// Compare packed unsigned 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30940///
30941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu32_mask&expand=1053)
30942#[inline]
30943#[target_feature(enable = "avx512f,avx512vl")]
30944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30945#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30946pub fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
30947 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(k1, a, b)
30948}
30949
30950/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30951///
30952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu32_mask&expand=933)
30953#[inline]
30954#[target_feature(enable = "avx512f")]
30955#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30956#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30957pub fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
30958 unsafe { simd_bitmask::<u32x16, _>(simd_gt(x:a.as_u32x16(), y:b.as_u32x16())) }
30959}
30960
30961/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30962///
30963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu32_mask&expand=934)
30964#[inline]
30965#[target_feature(enable = "avx512f")]
30966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30967#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30968pub fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
30969 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
30970}
30971
30972/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30973///
30974/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu32_mask&expand=931)
30975#[inline]
30976#[target_feature(enable = "avx512f,avx512vl")]
30977#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30978#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30979pub fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
30980 unsafe { simd_bitmask::<u32x8, _>(simd_gt(x:a.as_u32x8(), y:b.as_u32x8())) }
30981}
30982
30983/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
30984///
30985/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu32_mask&expand=932)
30986#[inline]
30987#[target_feature(enable = "avx512f,avx512vl")]
30988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
30989#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
30990pub fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
30991 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
30992}
30993
30994/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k.
30995///
30996/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu32_mask&expand=929)
30997#[inline]
30998#[target_feature(enable = "avx512f,avx512vl")]
30999#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31000#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31001pub fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31002 unsafe { simd_bitmask::<u32x4, _>(simd_gt(x:a.as_u32x4(), y:b.as_u32x4())) }
31003}
31004
31005/// Compare packed unsigned 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31006///
31007/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu32_mask&expand=930)
31008#[inline]
31009#[target_feature(enable = "avx512f,avx512vl")]
31010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31011#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31012pub fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31013 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31014}
31015
31016/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31017///
31018/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu32_mask&expand=995)
31019#[inline]
31020#[target_feature(enable = "avx512f")]
31021#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31022#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31023pub fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31024 unsafe { simd_bitmask::<u32x16, _>(simd_le(x:a.as_u32x16(), y:b.as_u32x16())) }
31025}
31026
31027/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31028///
31029/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu32_mask&expand=996)
31030#[inline]
31031#[target_feature(enable = "avx512f")]
31032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31033#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31034pub fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31035 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31036}
31037
31038/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31039///
31040/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu32_mask&expand=993)
31041#[inline]
31042#[target_feature(enable = "avx512f,avx512vl")]
31043#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31044#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31045pub fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31046 unsafe { simd_bitmask::<u32x8, _>(simd_le(x:a.as_u32x8(), y:b.as_u32x8())) }
31047}
31048
31049/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31050///
31051/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu32_mask&expand=994)
31052#[inline]
31053#[target_feature(enable = "avx512f,avx512vl")]
31054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31055#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31056pub fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31057 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31058}
31059
31060/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31061///
31062/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu32_mask&expand=991)
31063#[inline]
31064#[target_feature(enable = "avx512f,avx512vl")]
31065#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31066#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31067pub fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31068 unsafe { simd_bitmask::<u32x4, _>(simd_le(x:a.as_u32x4(), y:b.as_u32x4())) }
31069}
31070
31071/// Compare packed unsigned 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31072///
31073/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu32_mask&expand=992)
31074#[inline]
31075#[target_feature(enable = "avx512f,avx512vl")]
31076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31077#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31078pub fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31079 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LE>(k1, a, b)
31080}
31081
31082/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31083///
31084/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu32_mask&expand=873)
31085#[inline]
31086#[target_feature(enable = "avx512f")]
31087#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31088#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31089pub fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31090 unsafe { simd_bitmask::<u32x16, _>(simd_ge(x:a.as_u32x16(), y:b.as_u32x16())) }
31091}
31092
31093/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31094///
31095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu32_mask&expand=874)
31096#[inline]
31097#[target_feature(enable = "avx512f")]
31098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31099#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31100pub fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31101 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31102}
31103
31104/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31105///
31106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu32_mask&expand=871)
31107#[inline]
31108#[target_feature(enable = "avx512f,avx512vl")]
31109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31110#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31111pub fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31112 unsafe { simd_bitmask::<u32x8, _>(simd_ge(x:a.as_u32x8(), y:b.as_u32x8())) }
31113}
31114
31115/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31116///
31117/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu32_mask&expand=872)
31118#[inline]
31119#[target_feature(enable = "avx512f,avx512vl")]
31120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31121#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31122pub fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31123 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31124}
31125
31126/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31127///
31128/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu32_mask&expand=869)
31129#[inline]
31130#[target_feature(enable = "avx512f,avx512vl")]
31131#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31132#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31133pub fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31134 unsafe { simd_bitmask::<u32x4, _>(simd_ge(x:a.as_u32x4(), y:b.as_u32x4())) }
31135}
31136
31137/// Compare packed unsigned 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31138///
31139/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu32_mask&expand=870)
31140#[inline]
31141#[target_feature(enable = "avx512f,avx512vl")]
31142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31143#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31144pub fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31145 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31146}
31147
31148/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31149///
31150/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu32_mask&expand=807)
31151#[inline]
31152#[target_feature(enable = "avx512f")]
31153#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31154#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31155pub fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31156 unsafe { simd_bitmask::<u32x16, _>(simd_eq(x:a.as_u32x16(), y:b.as_u32x16())) }
31157}
31158
31159/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31160///
31161/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu32_mask&expand=808)
31162#[inline]
31163#[target_feature(enable = "avx512f")]
31164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31165#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31166pub fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31167 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31168}
31169
31170/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31171///
31172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu32_mask&expand=805)
31173#[inline]
31174#[target_feature(enable = "avx512f,avx512vl")]
31175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31176#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31177pub fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31178 unsafe { simd_bitmask::<u32x8, _>(simd_eq(x:a.as_u32x8(), y:b.as_u32x8())) }
31179}
31180
31181/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31182///
31183/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu32_mask&expand=806)
31184#[inline]
31185#[target_feature(enable = "avx512f,avx512vl")]
31186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31187#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31188pub fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31189 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31190}
31191
31192/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k.
31193///
31194/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu32_mask&expand=803)
31195#[inline]
31196#[target_feature(enable = "avx512f,avx512vl")]
31197#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31198#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31199pub fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31200 unsafe { simd_bitmask::<u32x4, _>(simd_eq(x:a.as_u32x4(), y:b.as_u32x4())) }
31201}
31202
31203/// Compare packed unsigned 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31204///
31205/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu32_mask&expand=804)
31206#[inline]
31207#[target_feature(enable = "avx512f,avx512vl")]
31208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31209#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31210pub fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31211 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31212}
31213
31214/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31215///
31216/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu32_mask&expand=1112)
31217#[inline]
31218#[target_feature(enable = "avx512f")]
31219#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31220#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31221pub fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31222 unsafe { simd_bitmask::<u32x16, _>(simd_ne(x:a.as_u32x16(), y:b.as_u32x16())) }
31223}
31224
31225/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31226///
31227/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu32_mask&expand=1113)
31228#[inline]
31229#[target_feature(enable = "avx512f")]
31230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31231#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31232pub fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31233 _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31234}
31235
31236/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31237///
31238/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu32_mask&expand=1110)
31239#[inline]
31240#[target_feature(enable = "avx512f,avx512vl")]
31241#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31242#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31243pub fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31244 unsafe { simd_bitmask::<u32x8, _>(simd_ne(x:a.as_u32x8(), y:b.as_u32x8())) }
31245}
31246
31247/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31248///
31249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu32_mask&expand=1111)
31250#[inline]
31251#[target_feature(enable = "avx512f,avx512vl")]
31252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31253#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31254pub fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31255 _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31256}
31257
31258/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31259///
31260/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu32_mask&expand=1108)
31261#[inline]
31262#[target_feature(enable = "avx512f,avx512vl")]
31263#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31264#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31265pub fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31266 unsafe { simd_bitmask::<u32x4, _>(simd_ne(x:a.as_u32x4(), y:b.as_u32x4())) }
31267}
31268
31269/// Compare packed unsigned 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31270///
31271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu32_mask&expand=1109)
31272#[inline]
31273#[target_feature(enable = "avx512f,avx512vl")]
31274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31275#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpud
31276pub fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31277 _mm_mask_cmp_epu32_mask::<_MM_CMPINT_NE>(k1, a, b)
31278}
31279
31280/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31281///
31282/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu32_mask&expand=721)
31283#[inline]
31284#[target_feature(enable = "avx512f")]
31285#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31286#[rustc_legacy_const_generics(2)]
31287#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31288pub fn _mm512_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31289 unsafe {
31290 static_assert_uimm_bits!(IMM3, 3);
31291 let a: u32x16 = a.as_u32x16();
31292 let b: u32x16 = b.as_u32x16();
31293 let r: i32x16 = match IMM3 {
31294 0 => simd_eq(x:a, y:b),
31295 1 => simd_lt(x:a, y:b),
31296 2 => simd_le(x:a, y:b),
31297 3 => i32x16::ZERO,
31298 4 => simd_ne(x:a, y:b),
31299 5 => simd_ge(x:a, y:b),
31300 6 => simd_gt(x:a, y:b),
31301 _ => i32x16::splat(-1),
31302 };
31303 simd_bitmask(r)
31304 }
31305}
31306
31307/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31308///
31309/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu32_mask&expand=722)
31310#[inline]
31311#[target_feature(enable = "avx512f")]
31312#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31313#[rustc_legacy_const_generics(3)]
31314#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31315pub fn _mm512_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31316 k1: __mmask16,
31317 a: __m512i,
31318 b: __m512i,
31319) -> __mmask16 {
31320 unsafe {
31321 static_assert_uimm_bits!(IMM3, 3);
31322 let a: u32x16 = a.as_u32x16();
31323 let b: u32x16 = b.as_u32x16();
31324 let k1: i32x16 = simd_select_bitmask(m:k1, yes:i32x16::splat(-1), no:i32x16::ZERO);
31325 let r: i32x16 = match IMM3 {
31326 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
31327 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
31328 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
31329 3 => i32x16::ZERO,
31330 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
31331 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
31332 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
31333 _ => k1,
31334 };
31335 simd_bitmask(r)
31336 }
31337}
31338
31339/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31340///
31341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu32_mask&expand=719)
31342#[inline]
31343#[target_feature(enable = "avx512f,avx512vl")]
31344#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31345#[rustc_legacy_const_generics(2)]
31346#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31347pub fn _mm256_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31348 unsafe {
31349 static_assert_uimm_bits!(IMM3, 3);
31350 let a: u32x8 = a.as_u32x8();
31351 let b: u32x8 = b.as_u32x8();
31352 let r: i32x8 = match IMM3 {
31353 0 => simd_eq(x:a, y:b),
31354 1 => simd_lt(x:a, y:b),
31355 2 => simd_le(x:a, y:b),
31356 3 => i32x8::ZERO,
31357 4 => simd_ne(x:a, y:b),
31358 5 => simd_ge(x:a, y:b),
31359 6 => simd_gt(x:a, y:b),
31360 _ => i32x8::splat(-1),
31361 };
31362 simd_bitmask(r)
31363 }
31364}
31365
31366/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31367///
31368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu32_mask&expand=720)
31369#[inline]
31370#[target_feature(enable = "avx512f,avx512vl")]
31371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31372#[rustc_legacy_const_generics(3)]
31373#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31374pub fn _mm256_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31375 k1: __mmask8,
31376 a: __m256i,
31377 b: __m256i,
31378) -> __mmask8 {
31379 unsafe {
31380 static_assert_uimm_bits!(IMM3, 3);
31381 let a: u32x8 = a.as_u32x8();
31382 let b: u32x8 = b.as_u32x8();
31383 let k1: i32x8 = simd_select_bitmask(m:k1, yes:i32x8::splat(-1), no:i32x8::ZERO);
31384 let r: i32x8 = match IMM3 {
31385 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
31386 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
31387 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
31388 3 => i32x8::ZERO,
31389 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
31390 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
31391 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
31392 _ => k1,
31393 };
31394 simd_bitmask(r)
31395 }
31396}
31397
31398/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31399///
31400/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu32_mask&expand=717)
31401#[inline]
31402#[target_feature(enable = "avx512f,avx512vl")]
31403#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31404#[rustc_legacy_const_generics(2)]
31405#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31406pub fn _mm_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31407 unsafe {
31408 static_assert_uimm_bits!(IMM3, 3);
31409 let a: u32x4 = a.as_u32x4();
31410 let b: u32x4 = b.as_u32x4();
31411 let r: i32x4 = match IMM3 {
31412 0 => simd_eq(x:a, y:b),
31413 1 => simd_lt(x:a, y:b),
31414 2 => simd_le(x:a, y:b),
31415 3 => i32x4::ZERO,
31416 4 => simd_ne(x:a, y:b),
31417 5 => simd_ge(x:a, y:b),
31418 6 => simd_gt(x:a, y:b),
31419 _ => i32x4::splat(-1),
31420 };
31421 simd_bitmask(r)
31422 }
31423}
31424
31425/// Compare packed unsigned 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31426///
31427/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu32_mask&expand=718)
31428#[inline]
31429#[target_feature(enable = "avx512f,avx512vl")]
31430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31431#[rustc_legacy_const_generics(3)]
31432#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31433pub fn _mm_mask_cmp_epu32_mask<const IMM3: _MM_CMPINT_ENUM>(
31434 k1: __mmask8,
31435 a: __m128i,
31436 b: __m128i,
31437) -> __mmask8 {
31438 unsafe {
31439 static_assert_uimm_bits!(IMM3, 3);
31440 let a: u32x4 = a.as_u32x4();
31441 let b: u32x4 = b.as_u32x4();
31442 let k1: i32x4 = simd_select_bitmask(m:k1, yes:i32x4::splat(-1), no:i32x4::ZERO);
31443 let r: i32x4 = match IMM3 {
31444 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
31445 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
31446 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
31447 3 => i32x4::ZERO,
31448 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
31449 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
31450 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
31451 _ => k1,
31452 };
31453 simd_bitmask(r)
31454 }
31455}
31456
31457/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31458///
31459/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi32_mask&expand=1029)
31460#[inline]
31461#[target_feature(enable = "avx512f")]
31462#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31463#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31464pub fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31465 unsafe { simd_bitmask::<i32x16, _>(simd_lt(x:a.as_i32x16(), y:b.as_i32x16())) }
31466}
31467
31468/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31469///
31470/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi32_mask&expand=1031)
31471#[inline]
31472#[target_feature(enable = "avx512f")]
31473#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31474#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31475pub fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31476 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31477}
31478
31479/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31480///
31481/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi32_mask&expand=1027)
31482#[inline]
31483#[target_feature(enable = "avx512f,avx512vl")]
31484#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31485#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31486pub fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31487 unsafe { simd_bitmask::<i32x8, _>(simd_lt(x:a.as_i32x8(), y:b.as_i32x8())) }
31488}
31489
31490/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31491///
31492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi32_mask&expand=1028)
31493#[inline]
31494#[target_feature(enable = "avx512f,avx512vl")]
31495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31496#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31497pub fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31498 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31499}
31500
31501/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k.
31502///
31503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi32_mask&expand=1025)
31504#[inline]
31505#[target_feature(enable = "avx512f,avx512vl")]
31506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31507#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31508pub fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31509 unsafe { simd_bitmask::<i32x4, _>(simd_lt(x:a.as_i32x4(), y:b.as_i32x4())) }
31510}
31511
31512/// Compare packed signed 32-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31513///
31514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi32_mask&expand=1026)
31515#[inline]
31516#[target_feature(enable = "avx512f,avx512vl")]
31517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31518#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31519pub fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31520 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(k1, a, b)
31521}
31522
31523/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31524///
31525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi32_mask&expand=905)
31526#[inline]
31527#[target_feature(enable = "avx512f")]
31528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31529#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31530pub fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31531 unsafe { simd_bitmask::<i32x16, _>(simd_gt(x:a.as_i32x16(), y:b.as_i32x16())) }
31532}
31533
31534/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31535///
31536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi32_mask&expand=906)
31537#[inline]
31538#[target_feature(enable = "avx512f")]
31539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31540#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31541pub fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31542 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31543}
31544
31545/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31546///
31547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi32_mask&expand=903)
31548#[inline]
31549#[target_feature(enable = "avx512f,avx512vl")]
31550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31551#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31552pub fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31553 unsafe { simd_bitmask::<i32x8, _>(simd_gt(x:a.as_i32x8(), y:b.as_i32x8())) }
31554}
31555
31556/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31557///
31558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi32_mask&expand=904)
31559#[inline]
31560#[target_feature(enable = "avx512f,avx512vl")]
31561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31562#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31563pub fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31564 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31565}
31566
31567/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k.
31568///
31569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi32_mask&expand=901)
31570#[inline]
31571#[target_feature(enable = "avx512f,avx512vl")]
31572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31573#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31574pub fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31575 unsafe { simd_bitmask::<i32x4, _>(simd_gt(x:a.as_i32x4(), y:b.as_i32x4())) }
31576}
31577
31578/// Compare packed signed 32-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31579///
31580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi32_mask&expand=902)
31581#[inline]
31582#[target_feature(enable = "avx512f,avx512vl")]
31583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31584#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31585pub fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31586 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLE>(k1, a, b)
31587}
31588
31589/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31590///
31591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi32_mask&expand=971)
31592#[inline]
31593#[target_feature(enable = "avx512f")]
31594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31595#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31596pub fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31597 unsafe { simd_bitmask::<i32x16, _>(simd_le(x:a.as_i32x16(), y:b.as_i32x16())) }
31598}
31599
31600/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31601///
31602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi32_mask&expand=972)
31603#[inline]
31604#[target_feature(enable = "avx512f")]
31605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31606#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31607pub fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31608 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31609}
31610
31611/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31612///
31613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi32_mask&expand=969)
31614#[inline]
31615#[target_feature(enable = "avx512f,avx512vl")]
31616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31617#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31618pub fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31619 unsafe { simd_bitmask::<i32x8, _>(simd_le(x:a.as_i32x8(), y:b.as_i32x8())) }
31620}
31621
31622/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31623///
31624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi32_mask&expand=970)
31625#[inline]
31626#[target_feature(enable = "avx512f,avx512vl")]
31627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31628#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31629pub fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31630 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31631}
31632
31633/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
31634///
31635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi32_mask&expand=967)
31636#[inline]
31637#[target_feature(enable = "avx512f,avx512vl")]
31638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31639#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31640pub fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31641 unsafe { simd_bitmask::<i32x4, _>(simd_le(x:a.as_i32x4(), y:b.as_i32x4())) }
31642}
31643
31644/// Compare packed signed 32-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31645///
31646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi32_mask&expand=968)
31647#[inline]
31648#[target_feature(enable = "avx512f,avx512vl")]
31649#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31650#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31651pub fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31652 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LE>(k1, a, b)
31653}
31654
31655/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31656///
31657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi32_mask&expand=849)
31658#[inline]
31659#[target_feature(enable = "avx512f")]
31660#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31661#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31662pub fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31663 unsafe { simd_bitmask::<i32x16, _>(simd_ge(x:a.as_i32x16(), y:b.as_i32x16())) }
31664}
31665
31666/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31667///
31668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi32_mask&expand=850)
31669#[inline]
31670#[target_feature(enable = "avx512f")]
31671#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31672#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31673pub fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31674 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31675}
31676
31677/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31678///
31679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi32_mask&expand=847)
31680#[inline]
31681#[target_feature(enable = "avx512f,avx512vl")]
31682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31683#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31684pub fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31685 unsafe { simd_bitmask::<i32x8, _>(simd_ge(x:a.as_i32x8(), y:b.as_i32x8())) }
31686}
31687
31688/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31689///
31690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi32_mask&expand=848)
31691#[inline]
31692#[target_feature(enable = "avx512f,avx512vl")]
31693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31694#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31695pub fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31696 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31697}
31698
31699/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
31700///
31701/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi32_mask&expand=845)
31702#[inline]
31703#[target_feature(enable = "avx512f,avx512vl")]
31704#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31705#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31706pub fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31707 unsafe { simd_bitmask::<i32x4, _>(simd_ge(x:a.as_i32x4(), y:b.as_i32x4())) }
31708}
31709
31710/// Compare packed signed 32-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31711///
31712/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi32_mask&expand=846)
31713#[inline]
31714#[target_feature(enable = "avx512f,avx512vl")]
31715#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31716#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31717pub fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31718 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NLT>(k1, a, b)
31719}
31720
31721/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31722///
31723/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi32_mask&expand=779)
31724#[inline]
31725#[target_feature(enable = "avx512f")]
31726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31727#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31728pub fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31729 unsafe { simd_bitmask::<i32x16, _>(simd_eq(x:a.as_i32x16(), y:b.as_i32x16())) }
31730}
31731
31732/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31733///
31734/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi32_mask&expand=780)
31735#[inline]
31736#[target_feature(enable = "avx512f")]
31737#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31738#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31739pub fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31740 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31741}
31742
31743/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31744///
31745/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi32_mask&expand=777)
31746#[inline]
31747#[target_feature(enable = "avx512f,avx512vl")]
31748#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31749#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31750pub fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31751 unsafe { simd_bitmask::<i32x8, _>(simd_eq(x:a.as_i32x8(), y:b.as_i32x8())) }
31752}
31753
31754/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31755///
31756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi32_mask&expand=778)
31757#[inline]
31758#[target_feature(enable = "avx512f,avx512vl")]
31759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31760#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31761pub fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31762 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31763}
31764
31765/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k.
31766///
31767/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi32_mask&expand=775)
31768#[inline]
31769#[target_feature(enable = "avx512f,avx512vl")]
31770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31771#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31772pub fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31773 unsafe { simd_bitmask::<i32x4, _>(simd_eq(x:a.as_i32x4(), y:b.as_i32x4())) }
31774}
31775
31776/// Compare packed 32-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31777///
31778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi32_mask&expand=776)
31779#[inline]
31780#[target_feature(enable = "avx512f,avx512vl")]
31781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31782#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31783pub fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31784 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_EQ>(k1, a, b)
31785}
31786
31787/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31788///
31789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi32_mask&expand=1088)
31790#[inline]
31791#[target_feature(enable = "avx512f")]
31792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31793#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31794pub fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
31795 unsafe { simd_bitmask::<i32x16, _>(simd_ne(x:a.as_i32x16(), y:b.as_i32x16())) }
31796}
31797
31798/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31799///
31800/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi32_mask&expand=1089)
31801#[inline]
31802#[target_feature(enable = "avx512f")]
31803#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31804#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31805pub fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
31806 _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31807}
31808
31809/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31810///
31811/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi32_mask&expand=1086)
31812#[inline]
31813#[target_feature(enable = "avx512f,avx512vl")]
31814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31815#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31816pub fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
31817 unsafe { simd_bitmask::<i32x8, _>(simd_ne(x:a.as_i32x8(), y:b.as_i32x8())) }
31818}
31819
31820/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31821///
31822/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi32_mask&expand=1087)
31823#[inline]
31824#[target_feature(enable = "avx512f,avx512vl")]
31825#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31826#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31827pub fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
31828 _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31829}
31830
31831/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k.
31832///
31833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi32_mask&expand=1084)
31834#[inline]
31835#[target_feature(enable = "avx512f,avx512vl")]
31836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31837#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31838pub fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
31839 unsafe { simd_bitmask::<i32x4, _>(simd_ne(x:a.as_i32x4(), y:b.as_i32x4())) }
31840}
31841
31842/// Compare packed 32-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31843///
31844/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi32_mask&expand=1085)
31845#[inline]
31846#[target_feature(enable = "avx512f,avx512vl")]
31847#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31848#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpd
31849pub fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
31850 _mm_mask_cmp_epi32_mask::<_MM_CMPINT_NE>(k1, a, b)
31851}
31852
31853/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31854///
31855/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi32_mask&expand=697)
31856#[inline]
31857#[target_feature(enable = "avx512f")]
31858#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31859#[rustc_legacy_const_generics(2)]
31860#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31861pub fn _mm512_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask16 {
31862 unsafe {
31863 static_assert_uimm_bits!(IMM3, 3);
31864 let a: i32x16 = a.as_i32x16();
31865 let b: i32x16 = b.as_i32x16();
31866 let r: i32x16 = match IMM3 {
31867 0 => simd_eq(x:a, y:b),
31868 1 => simd_lt(x:a, y:b),
31869 2 => simd_le(x:a, y:b),
31870 3 => i32x16::ZERO,
31871 4 => simd_ne(x:a, y:b),
31872 5 => simd_ge(x:a, y:b),
31873 6 => simd_gt(x:a, y:b),
31874 _ => i32x16::splat(-1),
31875 };
31876 simd_bitmask(r)
31877 }
31878}
31879
31880/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31881///
31882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi32_mask&expand=698)
31883#[inline]
31884#[target_feature(enable = "avx512f")]
31885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31886#[rustc_legacy_const_generics(3)]
31887#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31888pub fn _mm512_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31889 k1: __mmask16,
31890 a: __m512i,
31891 b: __m512i,
31892) -> __mmask16 {
31893 unsafe {
31894 static_assert_uimm_bits!(IMM3, 3);
31895 let a: i32x16 = a.as_i32x16();
31896 let b: i32x16 = b.as_i32x16();
31897 let k1: i32x16 = simd_select_bitmask(m:k1, yes:i32x16::splat(-1), no:i32x16::ZERO);
31898 let r: i32x16 = match IMM3 {
31899 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
31900 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
31901 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
31902 3 => i32x16::ZERO,
31903 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
31904 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
31905 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
31906 _ => k1,
31907 };
31908 simd_bitmask(r)
31909 }
31910}
31911
31912/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31913///
31914/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=#text=_mm256_cmp_epi32_mask&expand=695)
31915#[inline]
31916#[target_feature(enable = "avx512f,avx512vl")]
31917#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31918#[rustc_legacy_const_generics(2)]
31919#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31920pub fn _mm256_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
31921 unsafe {
31922 static_assert_uimm_bits!(IMM3, 3);
31923 let a: i32x8 = a.as_i32x8();
31924 let b: i32x8 = b.as_i32x8();
31925 let r: i32x8 = match IMM3 {
31926 0 => simd_eq(x:a, y:b),
31927 1 => simd_lt(x:a, y:b),
31928 2 => simd_le(x:a, y:b),
31929 3 => i32x8::ZERO,
31930 4 => simd_ne(x:a, y:b),
31931 5 => simd_ge(x:a, y:b),
31932 6 => simd_gt(x:a, y:b),
31933 _ => i32x8::splat(-1),
31934 };
31935 simd_bitmask(r)
31936 }
31937}
31938
31939/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31940///
31941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi32_mask&expand=696)
31942#[inline]
31943#[target_feature(enable = "avx512f,avx512vl")]
31944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31945#[rustc_legacy_const_generics(3)]
31946#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31947pub fn _mm256_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
31948 k1: __mmask8,
31949 a: __m256i,
31950 b: __m256i,
31951) -> __mmask8 {
31952 unsafe {
31953 static_assert_uimm_bits!(IMM3, 3);
31954 let a: i32x8 = a.as_i32x8();
31955 let b: i32x8 = b.as_i32x8();
31956 let k1: i32x8 = simd_select_bitmask(m:k1, yes:i32x8::splat(-1), no:i32x8::ZERO);
31957 let r: i32x8 = match IMM3 {
31958 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
31959 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
31960 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
31961 3 => i32x8::ZERO,
31962 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
31963 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
31964 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
31965 _ => k1,
31966 };
31967 simd_bitmask(r)
31968 }
31969}
31970
31971/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
31972///
31973/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi32_mask&expand=693)
31974#[inline]
31975#[target_feature(enable = "avx512f,avx512vl")]
31976#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
31977#[rustc_legacy_const_generics(2)]
31978#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
31979pub fn _mm_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
31980 unsafe {
31981 static_assert_uimm_bits!(IMM3, 3);
31982 let a: i32x4 = a.as_i32x4();
31983 let b: i32x4 = b.as_i32x4();
31984 let r: i32x4 = match IMM3 {
31985 0 => simd_eq(x:a, y:b),
31986 1 => simd_lt(x:a, y:b),
31987 2 => simd_le(x:a, y:b),
31988 3 => i32x4::ZERO,
31989 4 => simd_ne(x:a, y:b),
31990 5 => simd_ge(x:a, y:b),
31991 6 => simd_gt(x:a, y:b),
31992 _ => i32x4::splat(-1),
31993 };
31994 simd_bitmask(r)
31995 }
31996}
31997
31998/// Compare packed signed 32-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
31999///
32000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi32_mask&expand=694)
32001#[inline]
32002#[target_feature(enable = "avx512f,avx512vl")]
32003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32004#[rustc_legacy_const_generics(3)]
32005#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32006pub fn _mm_mask_cmp_epi32_mask<const IMM3: _MM_CMPINT_ENUM>(
32007 k1: __mmask8,
32008 a: __m128i,
32009 b: __m128i,
32010) -> __mmask8 {
32011 unsafe {
32012 static_assert_uimm_bits!(IMM3, 3);
32013 let a: i32x4 = a.as_i32x4();
32014 let b: i32x4 = b.as_i32x4();
32015 let k1: i32x4 = simd_select_bitmask(m:k1, yes:i32x4::splat(-1), no:i32x4::ZERO);
32016 let r: i32x4 = match IMM3 {
32017 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32018 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32019 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
32020 3 => i32x4::ZERO,
32021 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32022 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32023 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32024 _ => k1,
32025 };
32026 simd_bitmask(r)
32027 }
32028}
32029
32030/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32031///
32032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epu64_mask&expand=1062)
32033#[inline]
32034#[target_feature(enable = "avx512f")]
32035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32036#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32037pub fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32038 unsafe { simd_bitmask::<__m512i, _>(simd_lt(x:a.as_u64x8(), y:b.as_u64x8())) }
32039}
32040
32041/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32042///
32043/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epu64_mask&expand=1063)
32044#[inline]
32045#[target_feature(enable = "avx512f")]
32046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32047#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32048pub fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32049 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32050}
32051
32052/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32053///
32054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epu64_mask&expand=1060)
32055#[inline]
32056#[target_feature(enable = "avx512f,avx512vl")]
32057#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32058#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32059pub fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32060 unsafe { simd_bitmask::<__m256i, _>(simd_lt(x:a.as_u64x4(), y:b.as_u64x4())) }
32061}
32062
32063/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32064///
32065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epu64_mask&expand=1061)
32066#[inline]
32067#[target_feature(enable = "avx512f,avx512vl")]
32068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32069#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32070pub fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32071 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32072}
32073
32074/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k.
32075///
32076/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epu64_mask&expand=1058)
32077#[inline]
32078#[target_feature(enable = "avx512f,avx512vl")]
32079#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32080#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32081pub fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32082 unsafe { simd_bitmask::<__m128i, _>(simd_lt(x:a.as_u64x2(), y:b.as_u64x2())) }
32083}
32084
32085/// Compare packed unsigned 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32086///
32087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epu64_mask&expand=1059)
32088#[inline]
32089#[target_feature(enable = "avx512f,avx512vl")]
32090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32091#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32092pub fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32093 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(k1, a, b)
32094}
32095
32096/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32097///
32098/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epu64_mask&expand=939)
32099#[inline]
32100#[target_feature(enable = "avx512f")]
32101#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32102#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32103pub fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32104 unsafe { simd_bitmask::<__m512i, _>(simd_gt(x:a.as_u64x8(), y:b.as_u64x8())) }
32105}
32106
32107/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32108///
32109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epu64_mask&expand=940)
32110#[inline]
32111#[target_feature(enable = "avx512f")]
32112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32113#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32114pub fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32115 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32116}
32117
32118/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32119///
32120/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epu64_mask&expand=937)
32121#[inline]
32122#[target_feature(enable = "avx512f,avx512vl")]
32123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32124#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32125pub fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32126 unsafe { simd_bitmask::<__m256i, _>(simd_gt(x:a.as_u64x4(), y:b.as_u64x4())) }
32127}
32128
32129/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32130///
32131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epu64_mask&expand=938)
32132#[inline]
32133#[target_feature(enable = "avx512f,avx512vl")]
32134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32135#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32136pub fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32137 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32138}
32139
32140/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32141///
32142/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epu64_mask&expand=935)
32143#[inline]
32144#[target_feature(enable = "avx512f,avx512vl")]
32145#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32146#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32147pub fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32148 unsafe { simd_bitmask::<__m128i, _>(simd_gt(x:a.as_u64x2(), y:b.as_u64x2())) }
32149}
32150
32151/// Compare packed unsigned 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32152///
32153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epu64_mask&expand=936)
32154#[inline]
32155#[target_feature(enable = "avx512f,avx512vl")]
32156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32157#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32158pub fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32159 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32160}
32161
32162/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32163///
32164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epu64_mask&expand=1001)
32165#[inline]
32166#[target_feature(enable = "avx512f")]
32167#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32168#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32169pub fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32170 unsafe { simd_bitmask::<__m512i, _>(simd_le(x:a.as_u64x8(), y:b.as_u64x8())) }
32171}
32172
32173/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32174///
32175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epu64_mask&expand=1002)
32176#[inline]
32177#[target_feature(enable = "avx512f")]
32178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32179#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32180pub fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32181 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32182}
32183
32184/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32185///
32186/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epu64_mask&expand=999)
32187#[inline]
32188#[target_feature(enable = "avx512f,avx512vl")]
32189#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32190#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32191pub fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32192 unsafe { simd_bitmask::<__m256i, _>(simd_le(x:a.as_u64x4(), y:b.as_u64x4())) }
32193}
32194
32195/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32196///
32197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epu64_mask&expand=1000)
32198#[inline]
32199#[target_feature(enable = "avx512f,avx512vl")]
32200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32201#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32202pub fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32203 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32204}
32205
32206/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32207///
32208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epu64_mask&expand=997)
32209#[inline]
32210#[target_feature(enable = "avx512f,avx512vl")]
32211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32212#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32213pub fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32214 unsafe { simd_bitmask::<__m128i, _>(simd_le(x:a.as_u64x2(), y:b.as_u64x2())) }
32215}
32216
32217/// Compare packed unsigned 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32218///
32219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epu64_mask&expand=998)
32220#[inline]
32221#[target_feature(enable = "avx512f,avx512vl")]
32222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32223#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32224pub fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32225 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LE>(k1, a, b)
32226}
32227
32228/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32229///
32230/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epu64_mask&expand=879)
32231#[inline]
32232#[target_feature(enable = "avx512f")]
32233#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32234#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32235pub fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32236 unsafe { simd_bitmask::<__m512i, _>(simd_ge(x:a.as_u64x8(), y:b.as_u64x8())) }
32237}
32238
32239/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32240///
32241/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epu64_mask&expand=880)
32242#[inline]
32243#[target_feature(enable = "avx512f")]
32244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32245#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32246pub fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32247 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32248}
32249
32250/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32251///
32252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epu64_mask&expand=877)
32253#[inline]
32254#[target_feature(enable = "avx512f,avx512vl")]
32255#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32256#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32257pub fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32258 unsafe { simd_bitmask::<__m256i, _>(simd_ge(x:a.as_u64x4(), y:b.as_u64x4())) }
32259}
32260
32261/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32262///
32263/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epu64_mask&expand=878)
32264#[inline]
32265#[target_feature(enable = "avx512f,avx512vl")]
32266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32267#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32268pub fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32269 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32270}
32271
32272/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32273///
32274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epu64_mask&expand=875)
32275#[inline]
32276#[target_feature(enable = "avx512f,avx512vl")]
32277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32278#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32279pub fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32280 unsafe { simd_bitmask::<__m128i, _>(simd_ge(x:a.as_u64x2(), y:b.as_u64x2())) }
32281}
32282
32283/// Compare packed unsigned 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32284///
32285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epu64_mask&expand=876)
32286#[inline]
32287#[target_feature(enable = "avx512f,avx512vl")]
32288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32289#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32290pub fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32291 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32292}
32293
32294/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32295///
32296/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epu64_mask&expand=813)
32297#[inline]
32298#[target_feature(enable = "avx512f")]
32299#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32300#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32301pub fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32302 unsafe { simd_bitmask::<__m512i, _>(simd_eq(x:a.as_u64x8(), y:b.as_u64x8())) }
32303}
32304
32305/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32306///
32307/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epu64_mask&expand=814)
32308#[inline]
32309#[target_feature(enable = "avx512f")]
32310#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32311#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32312pub fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32313 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32314}
32315
32316/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32317///
32318/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epu64_mask&expand=811)
32319#[inline]
32320#[target_feature(enable = "avx512f,avx512vl")]
32321#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32322#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32323pub fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32324 unsafe { simd_bitmask::<__m256i, _>(simd_eq(x:a.as_u64x4(), y:b.as_u64x4())) }
32325}
32326
32327/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32328///
32329/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epu64_mask&expand=812)
32330#[inline]
32331#[target_feature(enable = "avx512f,avx512vl")]
32332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32333#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32334pub fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32335 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32336}
32337
32338/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k.
32339///
32340/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epu64_mask&expand=809)
32341#[inline]
32342#[target_feature(enable = "avx512f,avx512vl")]
32343#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32344#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32345pub fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32346 unsafe { simd_bitmask::<__m128i, _>(simd_eq(x:a.as_u64x2(), y:b.as_u64x2())) }
32347}
32348
32349/// Compare packed unsigned 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32350///
32351/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epu64_mask&expand=810)
32352#[inline]
32353#[target_feature(enable = "avx512f,avx512vl")]
32354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32355#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32356pub fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32357 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32358}
32359
32360/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32361///
32362/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epu64_mask&expand=1118)
32363#[inline]
32364#[target_feature(enable = "avx512f")]
32365#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32366#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32367pub fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32368 unsafe { simd_bitmask::<__m512i, _>(simd_ne(x:a.as_u64x8(), y:b.as_u64x8())) }
32369}
32370
32371/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32372///
32373/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epu64_mask&expand=1119)
32374#[inline]
32375#[target_feature(enable = "avx512f")]
32376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32377#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32378pub fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32379 _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32380}
32381
32382/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32383///
32384/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epu64_mask&expand=1116)
32385#[inline]
32386#[target_feature(enable = "avx512f,avx512vl")]
32387#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32388#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32389pub fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32390 unsafe { simd_bitmask::<__m256i, _>(simd_ne(x:a.as_u64x4(), y:b.as_u64x4())) }
32391}
32392
32393/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32394///
32395/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epu64_mask&expand=1117)
32396#[inline]
32397#[target_feature(enable = "avx512f,avx512vl")]
32398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32399#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32400pub fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32401 _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32402}
32403
32404/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32405///
32406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epu64_mask&expand=1114)
32407#[inline]
32408#[target_feature(enable = "avx512f,avx512vl")]
32409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32410#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32411pub fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32412 unsafe { simd_bitmask::<__m128i, _>(simd_ne(x:a.as_u64x2(), y:b.as_u64x2())) }
32413}
32414
32415/// Compare packed unsigned 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32416///
32417/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epu64_mask&expand=1115)
32418#[inline]
32419#[target_feature(enable = "avx512f,avx512vl")]
32420#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32421#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpuq
32422pub fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32423 _mm_mask_cmp_epu64_mask::<_MM_CMPINT_NE>(k1, a, b)
32424}
32425
32426/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32427///
32428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epu64_mask&expand=727)
32429#[inline]
32430#[target_feature(enable = "avx512f")]
32431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32432#[rustc_legacy_const_generics(2)]
32433#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32434pub fn _mm512_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
32435 unsafe {
32436 static_assert_uimm_bits!(IMM3, 3);
32437 let a: u64x8 = a.as_u64x8();
32438 let b: u64x8 = b.as_u64x8();
32439 let r: i64x8 = match IMM3 {
32440 0 => simd_eq(x:a, y:b),
32441 1 => simd_lt(x:a, y:b),
32442 2 => simd_le(x:a, y:b),
32443 3 => i64x8::ZERO,
32444 4 => simd_ne(x:a, y:b),
32445 5 => simd_ge(x:a, y:b),
32446 6 => simd_gt(x:a, y:b),
32447 _ => i64x8::splat(-1),
32448 };
32449 simd_bitmask(r)
32450 }
32451}
32452
32453/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32454///
32455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epu64_mask&expand=728)
32456#[inline]
32457#[target_feature(enable = "avx512f")]
32458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32459#[rustc_legacy_const_generics(3)]
32460#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32461pub fn _mm512_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32462 k1: __mmask8,
32463 a: __m512i,
32464 b: __m512i,
32465) -> __mmask8 {
32466 unsafe {
32467 static_assert_uimm_bits!(IMM3, 3);
32468 let a: u64x8 = a.as_u64x8();
32469 let b: u64x8 = b.as_u64x8();
32470 let k1: i64x8 = simd_select_bitmask(m:k1, yes:i64x8::splat(-1), no:i64x8::ZERO);
32471 let r: i64x8 = match IMM3 {
32472 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32473 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32474 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
32475 3 => i64x8::ZERO,
32476 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32477 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32478 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32479 _ => k1,
32480 };
32481 simd_bitmask(r)
32482 }
32483}
32484
32485/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32486///
32487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epu64_mask&expand=725)
32488#[inline]
32489#[target_feature(enable = "avx512f,avx512vl")]
32490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32491#[rustc_legacy_const_generics(2)]
32492#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32493pub fn _mm256_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
32494 unsafe {
32495 static_assert_uimm_bits!(IMM3, 3);
32496 let a: u64x4 = a.as_u64x4();
32497 let b: u64x4 = b.as_u64x4();
32498 let r: i64x4 = match IMM3 {
32499 0 => simd_eq(x:a, y:b),
32500 1 => simd_lt(x:a, y:b),
32501 2 => simd_le(x:a, y:b),
32502 3 => i64x4::ZERO,
32503 4 => simd_ne(x:a, y:b),
32504 5 => simd_ge(x:a, y:b),
32505 6 => simd_gt(x:a, y:b),
32506 _ => i64x4::splat(-1),
32507 };
32508 simd_bitmask(r)
32509 }
32510}
32511
32512/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32513///
32514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epu64_mask&expand=726)
32515#[inline]
32516#[target_feature(enable = "avx512f,avx512vl")]
32517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32518#[rustc_legacy_const_generics(3)]
32519#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32520pub fn _mm256_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32521 k1: __mmask8,
32522 a: __m256i,
32523 b: __m256i,
32524) -> __mmask8 {
32525 unsafe {
32526 static_assert_uimm_bits!(IMM3, 3);
32527 let a: u64x4 = a.as_u64x4();
32528 let b: u64x4 = b.as_u64x4();
32529 let k1: i64x4 = simd_select_bitmask(m:k1, yes:i64x4::splat(-1), no:i64x4::ZERO);
32530 let r: i64x4 = match IMM3 {
32531 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32532 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32533 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
32534 3 => i64x4::ZERO,
32535 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32536 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32537 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32538 _ => k1,
32539 };
32540 simd_bitmask(r)
32541 }
32542}
32543
32544/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
32545///
32546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epu64_mask&expand=723)
32547#[inline]
32548#[target_feature(enable = "avx512f,avx512vl")]
32549#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32550#[rustc_legacy_const_generics(2)]
32551#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32552pub fn _mm_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
32553 unsafe {
32554 static_assert_uimm_bits!(IMM3, 3);
32555 let a: u64x2 = a.as_u64x2();
32556 let b: u64x2 = b.as_u64x2();
32557 let r: i64x2 = match IMM3 {
32558 0 => simd_eq(x:a, y:b),
32559 1 => simd_lt(x:a, y:b),
32560 2 => simd_le(x:a, y:b),
32561 3 => i64x2::ZERO,
32562 4 => simd_ne(x:a, y:b),
32563 5 => simd_ge(x:a, y:b),
32564 6 => simd_gt(x:a, y:b),
32565 _ => i64x2::splat(-1),
32566 };
32567 simd_bitmask(r)
32568 }
32569}
32570
32571/// Compare packed unsigned 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32572///
32573/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epu64_mask&expand=724)
32574#[inline]
32575#[target_feature(enable = "avx512f,avx512vl")]
32576#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32577#[rustc_legacy_const_generics(3)]
32578#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
32579pub fn _mm_mask_cmp_epu64_mask<const IMM3: _MM_CMPINT_ENUM>(
32580 k1: __mmask8,
32581 a: __m128i,
32582 b: __m128i,
32583) -> __mmask8 {
32584 unsafe {
32585 static_assert_uimm_bits!(IMM3, 3);
32586 let a: u64x2 = a.as_u64x2();
32587 let b: u64x2 = b.as_u64x2();
32588 let k1: i64x2 = simd_select_bitmask(m:k1, yes:i64x2::splat(-1), no:i64x2::ZERO);
32589 let r: i64x2 = match IMM3 {
32590 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
32591 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
32592 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
32593 3 => i64x2::ZERO,
32594 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
32595 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
32596 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
32597 _ => k1,
32598 };
32599 simd_bitmask(r)
32600 }
32601}
32602
32603/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32604///
32605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmplt_epi64_mask&expand=1037)
32606#[inline]
32607#[target_feature(enable = "avx512f")]
32608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32609#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32610pub fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32611 unsafe { simd_bitmask::<__m512i, _>(simd_lt(x:a.as_i64x8(), y:b.as_i64x8())) }
32612}
32613
32614/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32615///
32616/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmplt_epi64_mask&expand=1038)
32617#[inline]
32618#[target_feature(enable = "avx512f")]
32619#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32620#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32621pub fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32622 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32623}
32624
32625/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32626///
32627/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmplt_epi64_mask&expand=1035)
32628#[inline]
32629#[target_feature(enable = "avx512f,avx512vl")]
32630#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32631#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32632pub fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32633 unsafe { simd_bitmask::<__m256i, _>(simd_lt(x:a.as_i64x4(), y:b.as_i64x4())) }
32634}
32635
32636/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32637///
32638/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmplt_epi64_mask&expand=1036)
32639#[inline]
32640#[target_feature(enable = "avx512f,avx512vl")]
32641#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32642#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32643pub fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32644 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32645}
32646
32647/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k.
32648///
32649/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmplt_epi64_mask&expand=1033)
32650#[inline]
32651#[target_feature(enable = "avx512f,avx512vl")]
32652#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32653#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32654pub fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32655 unsafe { simd_bitmask::<__m128i, _>(simd_lt(x:a.as_i64x2(), y:b.as_i64x2())) }
32656}
32657
32658/// Compare packed signed 64-bit integers in a and b for less-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32659///
32660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmplt_epi64_mask&expand=1034)
32661#[inline]
32662#[target_feature(enable = "avx512f,avx512vl")]
32663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32664#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32665pub fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32666 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(k1, a, b)
32667}
32668
32669/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32670///
32671/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpgt_epi64_mask&expand=913)
32672#[inline]
32673#[target_feature(enable = "avx512f")]
32674#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32675#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32676pub fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32677 unsafe { simd_bitmask::<__m512i, _>(simd_gt(x:a.as_i64x8(), y:b.as_i64x8())) }
32678}
32679
32680/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32681///
32682/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpgt_epi64_mask&expand=914)
32683#[inline]
32684#[target_feature(enable = "avx512f")]
32685#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32686#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32687pub fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32688 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32689}
32690
32691/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32692///
32693/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpgt_epi64_mask&expand=911)
32694#[inline]
32695#[target_feature(enable = "avx512f,avx512vl")]
32696#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32697#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32698pub fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32699 unsafe { simd_bitmask::<__m256i, _>(simd_gt(x:a.as_i64x4(), y:b.as_i64x4())) }
32700}
32701
32702/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32703///
32704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpgt_epi64_mask&expand=912)
32705#[inline]
32706#[target_feature(enable = "avx512f,avx512vl")]
32707#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32708#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32709pub fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32710 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32711}
32712
32713/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k.
32714///
32715/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpgt_epi64_mask&expand=909)
32716#[inline]
32717#[target_feature(enable = "avx512f,avx512vl")]
32718#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32719#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32720pub fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32721 unsafe { simd_bitmask::<__m128i, _>(simd_gt(x:a.as_i64x2(), y:b.as_i64x2())) }
32722}
32723
32724/// Compare packed signed 64-bit integers in a and b for greater-than, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32725///
32726/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpgt_epi64_mask&expand=910)
32727#[inline]
32728#[target_feature(enable = "avx512f,avx512vl")]
32729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32730#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32731pub fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32732 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLE>(k1, a, b)
32733}
32734
32735/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32736///
32737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmple_epi64_mask&expand=977)
32738#[inline]
32739#[target_feature(enable = "avx512f")]
32740#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32741#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32742pub fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32743 unsafe { simd_bitmask::<__m512i, _>(simd_le(x:a.as_i64x8(), y:b.as_i64x8())) }
32744}
32745
32746/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32747///
32748/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmple_epi64_mask&expand=978)
32749#[inline]
32750#[target_feature(enable = "avx512f")]
32751#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32752#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32753pub fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32754 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32755}
32756
32757/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32758///
32759/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmple_epi64_mask&expand=975)
32760#[inline]
32761#[target_feature(enable = "avx512f,avx512vl")]
32762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32763#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32764pub fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32765 unsafe { simd_bitmask::<__m256i, _>(simd_le(x:a.as_i64x4(), y:b.as_i64x4())) }
32766}
32767
32768/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32769///
32770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmple_epi64_mask&expand=976)
32771#[inline]
32772#[target_feature(enable = "avx512f,avx512vl")]
32773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32774#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32775pub fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32776 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32777}
32778
32779/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k.
32780///
32781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmple_epi64_mask&expand=973)
32782#[inline]
32783#[target_feature(enable = "avx512f,avx512vl")]
32784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32785#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32786pub fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32787 unsafe { simd_bitmask::<__m128i, _>(simd_le(x:a.as_i64x2(), y:b.as_i64x2())) }
32788}
32789
32790/// Compare packed signed 64-bit integers in a and b for less-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32791///
32792/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmple_epi64_mask&expand=974)
32793#[inline]
32794#[target_feature(enable = "avx512f,avx512vl")]
32795#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32796#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32797pub fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32798 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LE>(k1, a, b)
32799}
32800
32801/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32802///
32803/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpge_epi64_mask&expand=855)
32804#[inline]
32805#[target_feature(enable = "avx512f")]
32806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32807#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32808pub fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32809 unsafe { simd_bitmask::<__m512i, _>(simd_ge(x:a.as_i64x8(), y:b.as_i64x8())) }
32810}
32811
32812/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32813///
32814/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpge_epi64_mask&expand=856)
32815#[inline]
32816#[target_feature(enable = "avx512f")]
32817#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32818#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32819pub fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32820 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32821}
32822
32823/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32824///
32825/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpge_epi64_mask&expand=853)
32826#[inline]
32827#[target_feature(enable = "avx512f,avx512vl")]
32828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32829#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32830pub fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32831 unsafe { simd_bitmask::<__m256i, _>(simd_ge(x:a.as_i64x4(), y:b.as_i64x4())) }
32832}
32833
32834/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32835///
32836/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpge_epi64_mask&expand=854)
32837#[inline]
32838#[target_feature(enable = "avx512f,avx512vl")]
32839#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32840#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32841pub fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32842 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32843}
32844
32845/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k.
32846///
32847/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpge_epi64_mask&expand=851)
32848#[inline]
32849#[target_feature(enable = "avx512f,avx512vl")]
32850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32851#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32852pub fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32853 unsafe { simd_bitmask::<__m128i, _>(simd_ge(x:a.as_i64x2(), y:b.as_i64x2())) }
32854}
32855
32856/// Compare packed signed 64-bit integers in a and b for greater-than-or-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32857///
32858/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpge_epi64_mask&expand=852)
32859#[inline]
32860#[target_feature(enable = "avx512f,avx512vl")]
32861#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32862#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32863pub fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32864 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NLT>(k1, a, b)
32865}
32866
32867/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32868///
32869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpeq_epi64_mask&expand=787)
32870#[inline]
32871#[target_feature(enable = "avx512f")]
32872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32873#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32874pub fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32875 unsafe { simd_bitmask::<__m512i, _>(simd_eq(x:a.as_i64x8(), y:b.as_i64x8())) }
32876}
32877
32878/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32879///
32880/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpeq_epi64_mask&expand=788)
32881#[inline]
32882#[target_feature(enable = "avx512f")]
32883#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32884#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32885pub fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32886 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32887}
32888
32889/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32890///
32891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpeq_epi64_mask&expand=785)
32892#[inline]
32893#[target_feature(enable = "avx512f,avx512vl")]
32894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32895#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32896pub fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32897 unsafe { simd_bitmask::<__m256i, _>(simd_eq(x:a.as_i64x4(), y:b.as_i64x4())) }
32898}
32899
32900/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32901///
32902/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpeq_epi64_mask&expand=786)
32903#[inline]
32904#[target_feature(enable = "avx512f,avx512vl")]
32905#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32906#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32907pub fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32908 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32909}
32910
32911/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k.
32912///
32913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpeq_epi64_mask&expand=783)
32914#[inline]
32915#[target_feature(enable = "avx512f,avx512vl")]
32916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32917#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32918pub fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32919 unsafe { simd_bitmask::<__m128i, _>(simd_eq(x:a.as_i64x2(), y:b.as_i64x2())) }
32920}
32921
32922/// Compare packed 64-bit integers in a and b for equality, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32923///
32924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpeq_epi64_mask&expand=784)
32925#[inline]
32926#[target_feature(enable = "avx512f,avx512vl")]
32927#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32928#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32929pub fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32930 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_EQ>(k1, a, b)
32931}
32932
32933/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32934///
32935/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmpneq_epi64_mask&expand=1094)
32936#[inline]
32937#[target_feature(enable = "avx512f")]
32938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32939#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32940pub fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
32941 unsafe { simd_bitmask::<__m512i, _>(simd_ne(x:a.as_i64x8(), y:b.as_i64x8())) }
32942}
32943
32944/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32945///
32946/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmpneq_epi64_mask&expand=1095)
32947#[inline]
32948#[target_feature(enable = "avx512f")]
32949#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32950#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32951pub fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
32952 _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32953}
32954
32955/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32956///
32957/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmpneq_epi64_mask&expand=1092)
32958#[inline]
32959#[target_feature(enable = "avx512f,avx512vl")]
32960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32961#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32962pub fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
32963 unsafe { simd_bitmask::<__m256i, _>(simd_ne(x:a.as_i64x4(), y:b.as_i64x4())) }
32964}
32965
32966/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32967///
32968/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmpneq_epi64_mask&expand=1093)
32969#[inline]
32970#[target_feature(enable = "avx512f,avx512vl")]
32971#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32972#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32973pub fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
32974 _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32975}
32976
32977/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k.
32978///
32979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpneq_epi64_mask&expand=1090)
32980#[inline]
32981#[target_feature(enable = "avx512f,avx512vl")]
32982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32983#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32984pub fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
32985 unsafe { simd_bitmask::<__m128i, _>(simd_ne(x:a.as_i64x2(), y:b.as_i64x2())) }
32986}
32987
32988/// Compare packed signed 64-bit integers in a and b for not-equal, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
32989///
32990/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmpneq_epi64_mask&expand=1091)
32991#[inline]
32992#[target_feature(enable = "avx512f,avx512vl")]
32993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
32994#[cfg_attr(test, assert_instr(vpcmp))] //should be vpcmpq
32995pub fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
32996 _mm_mask_cmp_epi64_mask::<_MM_CMPINT_NE>(k1, a, b)
32997}
32998
32999/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33000///
33001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_cmp_epi64_mask&expand=703)
33002#[inline]
33003#[target_feature(enable = "avx512f")]
33004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33005#[rustc_legacy_const_generics(2)]
33006#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33007pub fn _mm512_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m512i, b: __m512i) -> __mmask8 {
33008 unsafe {
33009 static_assert_uimm_bits!(IMM3, 3);
33010 let a: i64x8 = a.as_i64x8();
33011 let b: i64x8 = b.as_i64x8();
33012 let r: i64x8 = match IMM3 {
33013 0 => simd_eq(x:a, y:b),
33014 1 => simd_lt(x:a, y:b),
33015 2 => simd_le(x:a, y:b),
33016 3 => i64x8::ZERO,
33017 4 => simd_ne(x:a, y:b),
33018 5 => simd_ge(x:a, y:b),
33019 6 => simd_gt(x:a, y:b),
33020 _ => i64x8::splat(-1),
33021 };
33022 simd_bitmask(r)
33023 }
33024}
33025
33026/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33027///
33028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cmp_epi64_mask&expand=704)
33029#[inline]
33030#[target_feature(enable = "avx512f")]
33031#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33032#[rustc_legacy_const_generics(3)]
33033#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33034pub fn _mm512_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33035 k1: __mmask8,
33036 a: __m512i,
33037 b: __m512i,
33038) -> __mmask8 {
33039 unsafe {
33040 static_assert_uimm_bits!(IMM3, 3);
33041 let a: i64x8 = a.as_i64x8();
33042 let b: i64x8 = b.as_i64x8();
33043 let k1: i64x8 = simd_select_bitmask(m:k1, yes:i64x8::splat(-1), no:i64x8::ZERO);
33044 let r: i64x8 = match IMM3 {
33045 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
33046 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
33047 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
33048 3 => i64x8::ZERO,
33049 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
33050 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
33051 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
33052 _ => k1,
33053 };
33054 simd_bitmask(r)
33055 }
33056}
33057
33058/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33059///
33060/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cmp_epi64_mask&expand=701)
33061#[inline]
33062#[target_feature(enable = "avx512f,avx512vl")]
33063#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33064#[rustc_legacy_const_generics(2)]
33065#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33066pub fn _mm256_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m256i, b: __m256i) -> __mmask8 {
33067 unsafe {
33068 static_assert_uimm_bits!(IMM3, 3);
33069 let a: i64x4 = a.as_i64x4();
33070 let b: i64x4 = b.as_i64x4();
33071 let r: i64x4 = match IMM3 {
33072 0 => simd_eq(x:a, y:b),
33073 1 => simd_lt(x:a, y:b),
33074 2 => simd_le(x:a, y:b),
33075 3 => i64x4::ZERO,
33076 4 => simd_ne(x:a, y:b),
33077 5 => simd_ge(x:a, y:b),
33078 6 => simd_gt(x:a, y:b),
33079 _ => i64x4::splat(-1),
33080 };
33081 simd_bitmask(r)
33082 }
33083}
33084
33085/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33086///
33087/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cmp_epi64_mask&expand=702)
33088#[inline]
33089#[target_feature(enable = "avx512f,avx512vl")]
33090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33091#[rustc_legacy_const_generics(3)]
33092#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33093pub fn _mm256_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33094 k1: __mmask8,
33095 a: __m256i,
33096 b: __m256i,
33097) -> __mmask8 {
33098 unsafe {
33099 static_assert_uimm_bits!(IMM3, 3);
33100 let a: i64x4 = a.as_i64x4();
33101 let b: i64x4 = b.as_i64x4();
33102 let k1: i64x4 = simd_select_bitmask(m:k1, yes:i64x4::splat(-1), no:i64x4::ZERO);
33103 let r: i64x4 = match IMM3 {
33104 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
33105 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
33106 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
33107 3 => i64x4::ZERO,
33108 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
33109 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
33110 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
33111 _ => k1,
33112 };
33113 simd_bitmask(r)
33114 }
33115}
33116
33117/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k.
33118///
33119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_epi64_mask&expand=699)
33120#[inline]
33121#[target_feature(enable = "avx512f,avx512vl")]
33122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33123#[rustc_legacy_const_generics(2)]
33124#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33125pub fn _mm_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(a: __m128i, b: __m128i) -> __mmask8 {
33126 unsafe {
33127 static_assert_uimm_bits!(IMM3, 3);
33128 let a: i64x2 = a.as_i64x2();
33129 let b: i64x2 = b.as_i64x2();
33130 let r: i64x2 = match IMM3 {
33131 0 => simd_eq(x:a, y:b),
33132 1 => simd_lt(x:a, y:b),
33133 2 => simd_le(x:a, y:b),
33134 3 => i64x2::ZERO,
33135 4 => simd_ne(x:a, y:b),
33136 5 => simd_ge(x:a, y:b),
33137 6 => simd_gt(x:a, y:b),
33138 _ => i64x2::splat(-1),
33139 };
33140 simd_bitmask(r)
33141 }
33142}
33143
33144/// Compare packed signed 64-bit integers in a and b based on the comparison operand specified by imm8, and store the results in mask vector k using zeromask k1 (elements are zeroed out when the corresponding mask bit is not set).
33145///
33146/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cmp_epi64_mask&expand=700)
33147#[inline]
33148#[target_feature(enable = "avx512f,avx512vl")]
33149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33150#[rustc_legacy_const_generics(3)]
33151#[cfg_attr(test, assert_instr(vpcmp, IMM3 = 0))]
33152pub fn _mm_mask_cmp_epi64_mask<const IMM3: _MM_CMPINT_ENUM>(
33153 k1: __mmask8,
33154 a: __m128i,
33155 b: __m128i,
33156) -> __mmask8 {
33157 unsafe {
33158 static_assert_uimm_bits!(IMM3, 3);
33159 let a: i64x2 = a.as_i64x2();
33160 let b: i64x2 = b.as_i64x2();
33161 let k1: i64x2 = simd_select_bitmask(m:k1, yes:i64x2::splat(-1), no:i64x2::ZERO);
33162 let r: i64x2 = match IMM3 {
33163 0 => simd_and(x:k1, y:simd_eq(x:a, y:b)),
33164 1 => simd_and(x:k1, y:simd_lt(x:a, y:b)),
33165 2 => simd_and(x:k1, y:simd_le(x:a, y:b)),
33166 3 => i64x2::ZERO,
33167 4 => simd_and(x:k1, y:simd_ne(x:a, y:b)),
33168 5 => simd_and(x:k1, y:simd_ge(x:a, y:b)),
33169 6 => simd_and(x:k1, y:simd_gt(x:a, y:b)),
33170 _ => k1,
33171 };
33172 simd_bitmask(r)
33173 }
33174}
33175
33176/// Reduce the packed 32-bit integers in a by addition. Returns the sum of all elements in a.
33177///
33178/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi32&expand=4556)
33179#[inline]
33180#[target_feature(enable = "avx512f")]
33181#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33182pub fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
33183 unsafe { simd_reduce_add_unordered(a.as_i32x16()) }
33184}
33185
33186/// Reduce the packed 32-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33187///
33188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi32&expand=4555)
33189#[inline]
33190#[target_feature(enable = "avx512f")]
33191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33192pub fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
33193 unsafe { simd_reduce_add_unordered(simd_select_bitmask(m:k, yes:a.as_i32x16(), no:i32x16::ZERO)) }
33194}
33195
33196/// Reduce the packed 64-bit integers in a by addition. Returns the sum of all elements in a.
33197///
33198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_epi64&expand=4558)
33199#[inline]
33200#[target_feature(enable = "avx512f")]
33201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33202pub fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
33203 unsafe { simd_reduce_add_unordered(a.as_i64x8()) }
33204}
33205
33206/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33207///
33208/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_epi64&expand=4557)
33209#[inline]
33210#[target_feature(enable = "avx512f")]
33211#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33212pub fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
33213 unsafe { simd_reduce_add_unordered(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::ZERO)) }
33214}
33215
33216/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33217///
33218/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_ps&expand=4562)
33219#[inline]
33220#[target_feature(enable = "avx512f")]
33221#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33222pub fn _mm512_reduce_add_ps(a: __m512) -> f32 {
33223 unsafe {
33224 // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33225 let a: __m256 = _mm256_add_ps(
33226 a:simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33227 b:simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33228 );
33229 let a: __m128 = _mm_add_ps(a:_mm256_extractf128_ps::<0>(a), b:_mm256_extractf128_ps::<1>(a));
33230 let a: __m128 = _mm_add_ps(a, b:simd_shuffle!(a, a, [2, 3, 0, 1]));
33231 simd_extract::<_, f32>(x:a, idx:0) + simd_extract::<_, f32>(x:a, idx:1)
33232 }
33233}
33234
33235/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33236///
33237/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_ps&expand=4561)
33238#[inline]
33239#[target_feature(enable = "avx512f")]
33240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33241pub fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
33242 unsafe { _mm512_reduce_add_ps(simd_select_bitmask(m:k, yes:a, no:_mm512_setzero_ps())) }
33243}
33244
33245/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
33246///
33247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_add_pd&expand=4560)
33248#[inline]
33249#[target_feature(enable = "avx512f")]
33250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33251pub fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
33252 unsafe {
33253 let a: __m256d = _mm256_add_pd(
33254 a:_mm512_extractf64x4_pd::<0>(a),
33255 b:_mm512_extractf64x4_pd::<1>(a),
33256 );
33257 let a: __m128d = _mm_add_pd(a:_mm256_extractf128_pd::<0>(a), b:_mm256_extractf128_pd::<1>(a));
33258 simd_extract::<_, f64>(x:a, idx:0) + simd_extract::<_, f64>(x:a, idx:1)
33259 }
33260}
33261
33262/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
33263///
33264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_add_pd&expand=4559)
33265#[inline]
33266#[target_feature(enable = "avx512f")]
33267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33268pub fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
33269 unsafe { _mm512_reduce_add_pd(simd_select_bitmask(m:k, yes:a, no:_mm512_setzero_pd())) }
33270}
33271
33272/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
33273///
33274/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi32&expand=4600)
33275#[inline]
33276#[target_feature(enable = "avx512f")]
33277#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33278pub fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
33279 unsafe { simd_reduce_mul_unordered(a.as_i32x16()) }
33280}
33281
33282/// Reduce the packed 32-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33283///
33284/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi32&expand=4599)
33285#[inline]
33286#[target_feature(enable = "avx512f")]
33287#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33288pub fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
33289 unsafe {
33290 simd_reduce_mul_unordered(simd_select_bitmask(
33291 m:k,
33292 yes:a.as_i32x16(),
33293 no:_mm512_set1_epi32(1).as_i32x16(),
33294 ))
33295 }
33296}
33297
33298/// Reduce the packed 64-bit integers in a by multiplication. Returns the product of all elements in a.
33299///
33300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_epi64&expand=4602)
33301#[inline]
33302#[target_feature(enable = "avx512f")]
33303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33304pub fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
33305 unsafe { simd_reduce_mul_unordered(a.as_i64x8()) }
33306}
33307
33308/// Reduce the packed 64-bit integers in a by multiplication using mask k. Returns the product of all active elements in a.
33309///
33310/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_epi64&expand=4601)
33311#[inline]
33312#[target_feature(enable = "avx512f")]
33313#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33314pub fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
33315 unsafe {
33316 simd_reduce_mul_unordered(simd_select_bitmask(
33317 m:k,
33318 yes:a.as_i64x8(),
33319 no:_mm512_set1_epi64(1).as_i64x8(),
33320 ))
33321 }
33322}
33323
33324/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33325///
33326/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_ps&expand=4606)
33327#[inline]
33328#[target_feature(enable = "avx512f")]
33329#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33330pub fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
33331 unsafe {
33332 // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
33333 let a: __m256 = _mm256_mul_ps(
33334 a:simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33335 b:simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33336 );
33337 let a: __m128 = _mm_mul_ps(a:_mm256_extractf128_ps::<0>(a), b:_mm256_extractf128_ps::<1>(a));
33338 let a: __m128 = _mm_mul_ps(a, b:simd_shuffle!(a, a, [2, 3, 0, 1]));
33339 simd_extract::<_, f32>(x:a, idx:0) * simd_extract::<_, f32>(x:a, idx:1)
33340 }
33341}
33342
33343/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33344///
33345/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_ps&expand=4605)
33346#[inline]
33347#[target_feature(enable = "avx512f")]
33348#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33349pub fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
33350 unsafe { _mm512_reduce_mul_ps(simd_select_bitmask(m:k, yes:a, no:_mm512_set1_ps(1.))) }
33351}
33352
33353/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
33354///
33355/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_mul_pd&expand=4604)
33356#[inline]
33357#[target_feature(enable = "avx512f")]
33358#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33359pub fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
33360 unsafe {
33361 let a: __m256d = _mm256_mul_pd(
33362 a:_mm512_extractf64x4_pd::<0>(a),
33363 b:_mm512_extractf64x4_pd::<1>(a),
33364 );
33365 let a: __m128d = _mm_mul_pd(a:_mm256_extractf128_pd::<0>(a), b:_mm256_extractf128_pd::<1>(a));
33366 simd_extract::<_, f64>(x:a, idx:0) * simd_extract::<_, f64>(x:a, idx:1)
33367 }
33368}
33369
33370/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
33371///
33372/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_mul_pd&expand=4603)
33373#[inline]
33374#[target_feature(enable = "avx512f")]
33375#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33376pub fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
33377 unsafe { _mm512_reduce_mul_pd(simd_select_bitmask(m:k, yes:a, no:_mm512_set1_pd(1.))) }
33378}
33379
33380/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33381///
33382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi32&expand=4576)
33383#[inline]
33384#[target_feature(enable = "avx512f")]
33385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33386pub fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
33387 unsafe { simd_reduce_max(a.as_i32x16()) }
33388}
33389
33390/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33391///
33392/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi32&expand=4575)
33393#[inline]
33394#[target_feature(enable = "avx512f")]
33395#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33396pub fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
33397 unsafe {
33398 simd_reduce_max(simd_select_bitmask(
33399 m:k,
33400 yes:a.as_i32x16(),
33401 no:i32x16::splat(i32::MIN),
33402 ))
33403 }
33404}
33405
33406/// Reduce the packed signed 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33407///
33408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epi64&expand=4578)
33409#[inline]
33410#[target_feature(enable = "avx512f")]
33411#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33412pub fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
33413 unsafe { simd_reduce_max(a.as_i64x8()) }
33414}
33415
33416/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33417///
33418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epi64&expand=4577)
33419#[inline]
33420#[target_feature(enable = "avx512f")]
33421#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33422pub fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
33423 unsafe { simd_reduce_max(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::splat(i64::MIN))) }
33424}
33425
33426/// Reduce the packed unsigned 32-bit integers in a by maximum. Returns the maximum of all elements in a.
33427///
33428/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu32&expand=4580)
33429#[inline]
33430#[target_feature(enable = "avx512f")]
33431#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33432pub fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
33433 unsafe { simd_reduce_max(a.as_u32x16()) }
33434}
33435
33436/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33437///
33438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu32&expand=4579)
33439#[inline]
33440#[target_feature(enable = "avx512f")]
33441#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33442pub fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
33443 unsafe { simd_reduce_max(simd_select_bitmask(m:k, yes:a.as_u32x16(), no:u32x16::ZERO)) }
33444}
33445
33446/// Reduce the packed unsigned 64-bit integers in a by maximum. Returns the maximum of all elements in a.
33447///
33448/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_epu64&expand=4582)
33449#[inline]
33450#[target_feature(enable = "avx512f")]
33451#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33452pub fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
33453 unsafe { simd_reduce_max(a.as_u64x8()) }
33454}
33455
33456/// Reduce the packed unsigned 64-bit integers in a by maximum using mask k. Returns the maximum of all active elements in a.
33457///
33458/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_epu64&expand=4581)
33459#[inline]
33460#[target_feature(enable = "avx512f")]
33461#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33462pub fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
33463 unsafe { simd_reduce_max(simd_select_bitmask(m:k, yes:a.as_u64x8(), no:u64x8::ZERO)) }
33464}
33465
33466/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33467///
33468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_ps&expand=4586)
33469#[inline]
33470#[target_feature(enable = "avx512f")]
33471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33472pub fn _mm512_reduce_max_ps(a: __m512) -> f32 {
33473 unsafe {
33474 let a: __m256 = _mm256_max_ps(
33475 a:simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33476 b:simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33477 );
33478 let a: __m128 = _mm_max_ps(a:_mm256_extractf128_ps::<0>(a), b:_mm256_extractf128_ps::<1>(a));
33479 let a: __m128 = _mm_max_ps(a, b:simd_shuffle!(a, a, [2, 3, 0, 1]));
33480 _mm_cvtss_f32(_mm_max_ss(a, b:_mm_movehdup_ps(a)))
33481 }
33482}
33483
33484/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33485///
33486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_ps&expand=4585)
33487#[inline]
33488#[target_feature(enable = "avx512f")]
33489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33490pub fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
33491 _mm512_reduce_max_ps(_mm512_mask_mov_ps(src:_mm512_set1_ps(f32::MIN), k, a))
33492}
33493
33494/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum. Returns the maximum of all elements in a.
33495///
33496/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_max_pd&expand=4584)
33497#[inline]
33498#[target_feature(enable = "avx512f")]
33499#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33500pub fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
33501 unsafe {
33502 let a: __m256d = _mm256_max_pd(
33503 a:_mm512_extractf64x4_pd::<0>(a),
33504 b:_mm512_extractf64x4_pd::<1>(a),
33505 );
33506 let a: __m128d = _mm_max_pd(a:_mm256_extractf128_pd::<0>(a), b:_mm256_extractf128_pd::<1>(a));
33507 _mm_cvtsd_f64(_mm_max_sd(a, b:simd_shuffle!(a, a, [1, 0])))
33508 }
33509}
33510
33511/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the maximum of all active elements in a.
33512///
33513/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_max_pd&expand=4583)
33514#[inline]
33515#[target_feature(enable = "avx512f")]
33516#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33517pub fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
33518 _mm512_reduce_max_pd(_mm512_mask_mov_pd(src:_mm512_set1_pd(f64::MIN), k, a))
33519}
33520
33521/// Reduce the packed signed 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33522///
33523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi32&expand=4588)
33524#[inline]
33525#[target_feature(enable = "avx512f")]
33526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33527pub fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
33528 unsafe { simd_reduce_min(a.as_i32x16()) }
33529}
33530
33531/// Reduce the packed signed 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33532///
33533/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi32&expand=4587)
33534#[inline]
33535#[target_feature(enable = "avx512f")]
33536#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33537pub fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
33538 unsafe {
33539 simd_reduce_min(simd_select_bitmask(
33540 m:k,
33541 yes:a.as_i32x16(),
33542 no:i32x16::splat(i32::MAX),
33543 ))
33544 }
33545}
33546
33547/// Reduce the packed signed 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33548///
33549/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epi64&expand=4590)
33550#[inline]
33551#[target_feature(enable = "avx512f")]
33552#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33553pub fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
33554 unsafe { simd_reduce_min(a.as_i64x8()) }
33555}
33556
33557/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33558///
33559/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epi64&expand=4589)
33560#[inline]
33561#[target_feature(enable = "avx512f")]
33562#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33563pub fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
33564 unsafe { simd_reduce_min(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::splat(i64::MAX))) }
33565}
33566
33567/// Reduce the packed unsigned 32-bit integers in a by minimum. Returns the minimum of all elements in a.
33568///
33569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu32&expand=4592)
33570#[inline]
33571#[target_feature(enable = "avx512f")]
33572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33573pub fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
33574 unsafe { simd_reduce_min(a.as_u32x16()) }
33575}
33576
33577/// Reduce the packed unsigned 32-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33578///
33579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu32&expand=4591)
33580#[inline]
33581#[target_feature(enable = "avx512f")]
33582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33583pub fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
33584 unsafe {
33585 simd_reduce_min(simd_select_bitmask(
33586 m:k,
33587 yes:a.as_u32x16(),
33588 no:u32x16::splat(u32::MAX),
33589 ))
33590 }
33591}
33592
33593/// Reduce the packed unsigned 64-bit integers in a by minimum. Returns the minimum of all elements in a.
33594///
33595/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_epu64&expand=4594)
33596#[inline]
33597#[target_feature(enable = "avx512f")]
33598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33599pub fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
33600 unsafe { simd_reduce_min(a.as_u64x8()) }
33601}
33602
33603/// Reduce the packed signed 64-bit integers in a by maximum using mask k. Returns the minimum of all active elements in a.
33604///
33605/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_epu64&expand=4589)
33606#[inline]
33607#[target_feature(enable = "avx512f")]
33608#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33609pub fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
33610 unsafe { simd_reduce_min(simd_select_bitmask(m:k, yes:a.as_u64x8(), no:u64x8::splat(u64::MAX))) }
33611}
33612
33613/// Reduce the packed single-precision (32-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33614///
33615/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_ps&expand=4598)
33616#[inline]
33617#[target_feature(enable = "avx512f")]
33618#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33619pub fn _mm512_reduce_min_ps(a: __m512) -> f32 {
33620 unsafe {
33621 let a: __m256 = _mm256_min_ps(
33622 a:simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
33623 b:simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
33624 );
33625 let a: __m128 = _mm_min_ps(a:_mm256_extractf128_ps::<0>(a), b:_mm256_extractf128_ps::<1>(a));
33626 let a: __m128 = _mm_min_ps(a, b:simd_shuffle!(a, a, [2, 3, 0, 1]));
33627 _mm_cvtss_f32(_mm_min_ss(a, b:_mm_movehdup_ps(a)))
33628 }
33629}
33630
33631/// Reduce the packed single-precision (32-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33632///
33633/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_ps&expand=4597)
33634#[inline]
33635#[target_feature(enable = "avx512f")]
33636#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33637pub fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
33638 _mm512_reduce_min_ps(_mm512_mask_mov_ps(src:_mm512_set1_ps(f32::MAX), k, a))
33639}
33640
33641/// Reduce the packed double-precision (64-bit) floating-point elements in a by minimum. Returns the minimum of all elements in a.
33642///
33643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_min_pd&expand=4596)
33644#[inline]
33645#[target_feature(enable = "avx512f")]
33646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33647pub fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
33648 unsafe {
33649 let a: __m256d = _mm256_min_pd(
33650 a:_mm512_extractf64x4_pd::<0>(a),
33651 b:_mm512_extractf64x4_pd::<1>(a),
33652 );
33653 let a: __m128d = _mm_min_pd(a:_mm256_extractf128_pd::<0>(a), b:_mm256_extractf128_pd::<1>(a));
33654 _mm_cvtsd_f64(_mm_min_sd(a, b:simd_shuffle!(a, a, [1, 0])))
33655 }
33656}
33657
33658/// Reduce the packed double-precision (64-bit) floating-point elements in a by maximum using mask k. Returns the minimum of all active elements in a.
33659///
33660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_min_pd&expand=4595)
33661#[inline]
33662#[target_feature(enable = "avx512f")]
33663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33664pub fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
33665 _mm512_reduce_min_pd(_mm512_mask_mov_pd(src:_mm512_set1_pd(f64::MAX), k, a))
33666}
33667
33668/// Reduce the packed 32-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33669///
33670/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi32&expand=4564)
33671#[inline]
33672#[target_feature(enable = "avx512f")]
33673#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33674pub fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
33675 unsafe { simd_reduce_and(a.as_i32x16()) }
33676}
33677
33678/// Reduce the packed 32-bit integers in a by bitwise AND using mask k. Returns the bitwise AND of all active elements in a.
33679///
33680/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi32&expand=4563)
33681#[inline]
33682#[target_feature(enable = "avx512f")]
33683#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33684pub fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
33685 unsafe { simd_reduce_and(simd_select_bitmask(m:k, yes:a.as_i32x16(), no:i32x16::splat(-1))) }
33686}
33687
33688/// Reduce the packed 64-bit integers in a by bitwise AND. Returns the bitwise AND of all elements in a.
33689///
33690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_and_epi64&expand=4566)
33691#[inline]
33692#[target_feature(enable = "avx512f")]
33693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33694pub fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
33695 unsafe { simd_reduce_and(a.as_i64x8()) }
33696}
33697
33698/// Reduce the packed 64-bit integers in a by addition using mask k. Returns the sum of all active elements in a.
33699///
33700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_and_epi64&expand=4557)
33701#[inline]
33702#[target_feature(enable = "avx512f")]
33703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33704pub fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
33705 unsafe { simd_reduce_and(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::splat(-1))) }
33706}
33707
33708/// Reduce the packed 32-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33709///
33710/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi32&expand=4608)
33711#[inline]
33712#[target_feature(enable = "avx512f")]
33713#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33714pub fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
33715 unsafe { simd_reduce_or(a.as_i32x16()) }
33716}
33717
33718/// Reduce the packed 32-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33719///
33720/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi32&expand=4607)
33721#[inline]
33722#[target_feature(enable = "avx512f")]
33723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33724pub fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
33725 unsafe { simd_reduce_or(simd_select_bitmask(m:k, yes:a.as_i32x16(), no:i32x16::ZERO)) }
33726}
33727
33728/// Reduce the packed 64-bit integers in a by bitwise OR. Returns the bitwise OR of all elements in a.
33729///
33730/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_reduce_or_epi64&expand=4610)
33731#[inline]
33732#[target_feature(enable = "avx512f")]
33733#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33734pub fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
33735 unsafe { simd_reduce_or(a.as_i64x8()) }
33736}
33737
33738/// Reduce the packed 64-bit integers in a by bitwise OR using mask k. Returns the bitwise OR of all active elements in a.
33739///
33740/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_reduce_or_epi64&expand=4609)
33741#[inline]
33742#[target_feature(enable = "avx512f")]
33743#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33744pub fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
33745 unsafe { simd_reduce_or(simd_select_bitmask(m:k, yes:a.as_i64x8(), no:i64x8::ZERO)) }
33746}
33747
33748/// Returns vector of type `__m512d` with indeterminate elements.
33749/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
33750/// In practice, this is equivalent to [`mem::zeroed`].
33751///
33752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_pd)
33753#[inline]
33754#[target_feature(enable = "avx512f")]
33755#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33756// This intrinsic has no corresponding instruction.
33757pub fn _mm512_undefined_pd() -> __m512d {
33758 unsafe { const { mem::zeroed() } }
33759}
33760
33761/// Returns vector of type `__m512` with indeterminate elements.
33762/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
33763/// In practice, this is equivalent to [`mem::zeroed`].
33764///
33765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_ps)
33766#[inline]
33767#[target_feature(enable = "avx512f")]
33768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33769// This intrinsic has no corresponding instruction.
33770pub fn _mm512_undefined_ps() -> __m512 {
33771 unsafe { const { mem::zeroed() } }
33772}
33773
33774/// Return vector of type __m512i with indeterminate elements.
33775/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
33776/// In practice, this is equivalent to [`mem::zeroed`].
33777///
33778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined_epi32&expand=5995)
33779#[inline]
33780#[target_feature(enable = "avx512f")]
33781#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33782// This intrinsic has no corresponding instruction.
33783pub fn _mm512_undefined_epi32() -> __m512i {
33784 unsafe { const { mem::zeroed() } }
33785}
33786
33787/// Return vector of type __m512 with indeterminate elements.
33788/// Despite being "undefined", this is some valid value and not equivalent to [`mem::MaybeUninit`].
33789/// In practice, this is equivalent to [`mem::zeroed`].
33790///
33791/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_undefined&expand=5994)
33792#[inline]
33793#[target_feature(enable = "avx512f")]
33794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33795// This intrinsic has no corresponding instruction.
33796pub fn _mm512_undefined() -> __m512 {
33797 unsafe { const { mem::zeroed() } }
33798}
33799
33800/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33801///
33802/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi32&expand=3377)
33803#[inline]
33804#[target_feature(enable = "avx512f")]
33805#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33806#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33807pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
33808 ptr::read_unaligned(src:mem_addr as *const __m512i)
33809}
33810
33811/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33812///
33813/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi32&expand=3374)
33814#[inline]
33815#[target_feature(enable = "avx512f,avx512vl")]
33816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33817#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33818pub unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
33819 ptr::read_unaligned(src:mem_addr as *const __m256i)
33820}
33821
33822/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
33823///
33824/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi32&expand=3371)
33825#[inline]
33826#[target_feature(enable = "avx512f,avx512vl")]
33827#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33828#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
33829pub unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
33830 ptr::read_unaligned(src:mem_addr as *const __m128i)
33831}
33832
33833/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33834///
33835/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi16&expand=1460)
33836#[inline]
33837#[target_feature(enable = "avx512f")]
33838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33839#[cfg_attr(test, assert_instr(vpmovdw))]
33840pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33841 vpmovdwmem(mem_addr, a.as_i32x16(), mask:k);
33842}
33843
33844/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33845///
33846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi16&expand=1462)
33847#[inline]
33848#[target_feature(enable = "avx512f,avx512vl")]
33849#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33850#[cfg_attr(test, assert_instr(vpmovdw))]
33851pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33852 vpmovdwmem256(mem_addr, a.as_i32x8(), mask:k);
33853}
33854
33855/// Convert packed 32-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33856///
33857/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi16&expand=1461)
33858#[inline]
33859#[target_feature(enable = "avx512f,avx512vl")]
33860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33861#[cfg_attr(test, assert_instr(vpmovdw))]
33862pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33863 vpmovdwmem128(mem_addr, a.as_i32x4(), mask:k);
33864}
33865
33866/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33867///
33868/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi16&expand=1833)
33869#[inline]
33870#[target_feature(enable = "avx512f")]
33871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33872#[cfg_attr(test, assert_instr(vpmovsdw))]
33873pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33874 vpmovsdwmem(mem_addr, a.as_i32x16(), mask:k);
33875}
33876
33877/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33878///
33879/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi16&expand=1832)
33880#[inline]
33881#[target_feature(enable = "avx512f,avx512vl")]
33882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33883#[cfg_attr(test, assert_instr(vpmovsdw))]
33884pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33885 vpmovsdwmem256(mem_addr, a.as_i32x8(), mask:k);
33886}
33887
33888/// Convert packed signed 32-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33889///
33890/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi16&expand=1831)
33891#[inline]
33892#[target_feature(enable = "avx512f,avx512vl")]
33893#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33894#[cfg_attr(test, assert_instr(vpmovsdw))]
33895pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33896 vpmovsdwmem128(mem_addr, a.as_i32x4(), mask:k);
33897}
33898
33899/// Convert packed unsigned 32-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33900///
33901/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi16&expand=2068)
33902#[inline]
33903#[target_feature(enable = "avx512f")]
33904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33905#[cfg_attr(test, assert_instr(vpmovusdw))]
33906pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33907 vpmovusdwmem(mem_addr, a.as_i32x16(), mask:k);
33908}
33909
33910/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33911///
33912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi16&expand=2067)
33913#[inline]
33914#[target_feature(enable = "avx512f,avx512vl")]
33915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33916#[cfg_attr(test, assert_instr(vpmovusdw))]
33917pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33918 vpmovusdwmem256(mem_addr, a.as_i32x8(), mask:k);
33919}
33920
33921/// Convert packed unsigned 32-bit integers in a to packed unsigned 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33922///
33923/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi16&expand=2066)
33924#[inline]
33925#[target_feature(enable = "avx512f,avx512vl")]
33926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33927#[cfg_attr(test, assert_instr(vpmovusdw))]
33928pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33929 vpmovusdwmem128(mem_addr, a.as_i32x4(), mask:k);
33930}
33931
33932/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33933///
33934/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi32_storeu_epi8&expand=1463)
33935#[inline]
33936#[target_feature(enable = "avx512f")]
33937#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33938#[cfg_attr(test, assert_instr(vpmovdb))]
33939pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33940 vpmovdbmem(mem_addr, a.as_i32x16(), mask:k);
33941}
33942
33943/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33944///
33945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi32_storeu_epi8&expand=1462)
33946#[inline]
33947#[target_feature(enable = "avx512f,avx512vl")]
33948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33949#[cfg_attr(test, assert_instr(vpmovdb))]
33950pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33951 vpmovdbmem256(mem_addr, a.as_i32x8(), mask:k);
33952}
33953
33954/// Convert packed 32-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33955///
33956/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi32_storeu_epi8&expand=1461)
33957#[inline]
33958#[target_feature(enable = "avx512f,avx512vl")]
33959#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33960#[cfg_attr(test, assert_instr(vpmovdb))]
33961pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33962 vpmovdbmem128(mem_addr, a.as_i32x4(), mask:k);
33963}
33964
33965/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33966///
33967/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi32_storeu_epi8&expand=1836)
33968#[inline]
33969#[target_feature(enable = "avx512f")]
33970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33971#[cfg_attr(test, assert_instr(vpmovsdb))]
33972pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
33973 vpmovsdbmem(mem_addr, a.as_i32x16(), mask:k);
33974}
33975
33976/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33977///
33978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi32_storeu_epi8&expand=1835)
33979#[inline]
33980#[target_feature(enable = "avx512f,avx512vl")]
33981#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33982#[cfg_attr(test, assert_instr(vpmovsdb))]
33983pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
33984 vpmovsdbmem256(mem_addr, a.as_i32x8(), mask:k);
33985}
33986
33987/// Convert packed signed 32-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33988///
33989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi32_storeu_epi8&expand=1834)
33990#[inline]
33991#[target_feature(enable = "avx512f,avx512vl")]
33992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
33993#[cfg_attr(test, assert_instr(vpmovsdb))]
33994pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
33995 vpmovsdbmem128(mem_addr, a.as_i32x4(), mask:k);
33996}
33997
33998/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
33999///
34000/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi32_storeu_epi8&expand=2071)
34001#[inline]
34002#[target_feature(enable = "avx512f")]
34003#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34004#[cfg_attr(test, assert_instr(vpmovusdb))]
34005pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
34006 vpmovusdbmem(mem_addr, a.as_i32x16(), mask:k);
34007}
34008
34009/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34010///
34011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi32_storeu_epi8&expand=2070)
34012#[inline]
34013#[target_feature(enable = "avx512f,avx512vl")]
34014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34015#[cfg_attr(test, assert_instr(vpmovusdb))]
34016pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34017 vpmovusdbmem256(mem_addr, a.as_i32x8(), mask:k);
34018}
34019
34020/// Convert packed unsigned 32-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34021///
34022/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi32_storeu_epi8&expand=2069)
34023#[inline]
34024#[target_feature(enable = "avx512f,avx512vl")]
34025#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34026#[cfg_attr(test, assert_instr(vpmovusdb))]
34027pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34028 vpmovusdbmem128(mem_addr, a.as_i32x4(), mask:k);
34029}
34030
34031/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34032///
34033/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi16&expand=1513)
34034#[inline]
34035#[target_feature(enable = "avx512f")]
34036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34037#[cfg_attr(test, assert_instr(vpmovqw))]
34038pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34039 vpmovqwmem(mem_addr, a.as_i64x8(), mask:k);
34040}
34041
34042/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34043///
34044/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi16&expand=1512)
34045#[inline]
34046#[target_feature(enable = "avx512f,avx512vl")]
34047#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34048#[cfg_attr(test, assert_instr(vpmovqw))]
34049pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34050 vpmovqwmem256(mem_addr, a.as_i64x4(), mask:k);
34051}
34052
34053/// Convert packed 64-bit integers in a to packed 16-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34054///
34055/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi16&expand=1511)
34056#[inline]
34057#[target_feature(enable = "avx512f,avx512vl")]
34058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34059#[cfg_attr(test, assert_instr(vpmovqw))]
34060pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34061 vpmovqwmem128(mem_addr, a.as_i64x2(), mask:k);
34062}
34063
34064/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34065///
34066/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi16&expand=1866)
34067#[inline]
34068#[target_feature(enable = "avx512f")]
34069#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34070#[cfg_attr(test, assert_instr(vpmovsqw))]
34071pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34072 vpmovsqwmem(mem_addr, a.as_i64x8(), mask:k);
34073}
34074
34075/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34076///
34077/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi16&expand=1865)
34078#[inline]
34079#[target_feature(enable = "avx512f,avx512vl")]
34080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34081#[cfg_attr(test, assert_instr(vpmovsqw))]
34082pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34083 vpmovsqwmem256(mem_addr, a.as_i64x4(), mask:k);
34084}
34085
34086/// Convert packed signed 64-bit integers in a to packed 16-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34087///
34088/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi16&expand=1864)
34089#[inline]
34090#[target_feature(enable = "avx512f,avx512vl")]
34091#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34092#[cfg_attr(test, assert_instr(vpmovsqw))]
34093pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34094 vpmovsqwmem128(mem_addr, a.as_i64x2(), mask:k);
34095}
34096
34097/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34098///
34099/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi16&expand=2101)
34100#[inline]
34101#[target_feature(enable = "avx512f")]
34102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34103#[cfg_attr(test, assert_instr(vpmovusqw))]
34104pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34105 vpmovusqwmem(mem_addr, a.as_i64x8(), mask:k);
34106}
34107
34108/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34109///
34110/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi16&expand=2100)
34111#[inline]
34112#[target_feature(enable = "avx512f,avx512vl")]
34113#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34114#[cfg_attr(test, assert_instr(vpmovusqw))]
34115pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34116 vpmovusqwmem256(mem_addr, a.as_i64x4(), mask:k);
34117}
34118
34119/// Convert packed unsigned 64-bit integers in a to packed 16-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34120///
34121/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi16&expand=2099)
34122#[inline]
34123#[target_feature(enable = "avx512f,avx512vl")]
34124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34125#[cfg_attr(test, assert_instr(vpmovusqw))]
34126pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34127 vpmovusqwmem128(mem_addr, a.as_i64x2(), mask:k);
34128}
34129
34130/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34131///
34132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi8&expand=1519)
34133#[inline]
34134#[target_feature(enable = "avx512f")]
34135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34136#[cfg_attr(test, assert_instr(vpmovqb))]
34137pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34138 vpmovqbmem(mem_addr, a.as_i64x8(), mask:k);
34139}
34140
34141/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34142///
34143/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi8&expand=1518)
34144#[inline]
34145#[target_feature(enable = "avx512f,avx512vl")]
34146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34147#[cfg_attr(test, assert_instr(vpmovqb))]
34148pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34149 vpmovqbmem256(mem_addr, a.as_i64x4(), mask:k);
34150}
34151
34152/// Convert packed 64-bit integers in a to packed 8-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34153///
34154/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi8&expand=1517)
34155#[inline]
34156#[target_feature(enable = "avx512f,avx512vl")]
34157#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34158#[cfg_attr(test, assert_instr(vpmovqb))]
34159pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34160 vpmovqbmem128(mem_addr, a.as_i64x2(), mask:k);
34161}
34162
34163/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34164///
34165/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi8&expand=1872)
34166#[inline]
34167#[target_feature(enable = "avx512f")]
34168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34169#[cfg_attr(test, assert_instr(vpmovsqb))]
34170pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34171 vpmovsqbmem(mem_addr, a.as_i64x8(), mask:k);
34172}
34173
34174/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34175///
34176/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi8&expand=1871)
34177#[inline]
34178#[target_feature(enable = "avx512f,avx512vl")]
34179#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34180#[cfg_attr(test, assert_instr(vpmovsqb))]
34181pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34182 vpmovsqbmem256(mem_addr, a.as_i64x4(), mask:k);
34183}
34184
34185/// Convert packed signed 64-bit integers in a to packed 8-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34186///
34187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi8&expand=1870)
34188#[inline]
34189#[target_feature(enable = "avx512f,avx512vl")]
34190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34191#[cfg_attr(test, assert_instr(vpmovsqb))]
34192pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34193 vpmovsqbmem128(mem_addr, a.as_i64x2(), mask:k);
34194}
34195
34196/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34197///
34198/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi8&expand=2107)
34199#[inline]
34200#[target_feature(enable = "avx512f")]
34201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34202#[cfg_attr(test, assert_instr(vpmovusqb))]
34203pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34204 vpmovusqbmem(mem_addr, a.as_i64x8(), mask:k);
34205}
34206
34207/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34208///
34209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi8&expand=2106)
34210#[inline]
34211#[target_feature(enable = "avx512f,avx512vl")]
34212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34213#[cfg_attr(test, assert_instr(vpmovusqb))]
34214pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34215 vpmovusqbmem256(mem_addr, a.as_i64x4(), mask:k);
34216}
34217
34218/// Convert packed unsigned 64-bit integers in a to packed 8-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34219///
34220/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi8&expand=2105)
34221#[inline]
34222#[target_feature(enable = "avx512f,avx512vl")]
34223#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34224#[cfg_attr(test, assert_instr(vpmovusqb))]
34225pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34226 vpmovusqbmem128(mem_addr, a.as_i64x2(), mask:k);
34227}
34228
34229///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34230///
34231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtepi64_storeu_epi32&expand=1516)
34232#[inline]
34233#[target_feature(enable = "avx512f")]
34234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34235#[cfg_attr(test, assert_instr(vpmovqd))]
34236pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34237 vpmovqdmem(mem_addr, a.as_i64x8(), mask:k);
34238}
34239
34240///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34241///
34242/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtepi64_storeu_epi32&expand=1515)
34243#[inline]
34244#[target_feature(enable = "avx512f,avx512vl")]
34245#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34246#[cfg_attr(test, assert_instr(vpmovqd))]
34247pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34248 vpmovqdmem256(mem_addr, a.as_i64x4(), mask:k);
34249}
34250
34251///Convert packed 64-bit integers in a to packed 32-bit integers with truncation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34252///
34253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtepi64_storeu_epi32&expand=1514)
34254#[inline]
34255#[target_feature(enable = "avx512f,avx512vl")]
34256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34257#[cfg_attr(test, assert_instr(vpmovqd))]
34258pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34259 vpmovqdmem128(mem_addr, a.as_i64x2(), mask:k);
34260}
34261
34262/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34263///
34264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtsepi64_storeu_epi32&expand=1869)
34265#[inline]
34266#[target_feature(enable = "avx512f")]
34267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34268#[cfg_attr(test, assert_instr(vpmovsqd))]
34269pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34270 vpmovsqdmem(mem_addr, a.as_i64x8(), mask:k);
34271}
34272
34273/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34274///
34275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtsepi64_storeu_epi32&expand=1868)
34276#[inline]
34277#[target_feature(enable = "avx512f,avx512vl")]
34278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34279#[cfg_attr(test, assert_instr(vpmovsqd))]
34280pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34281 vpmovsqdmem256(mem_addr, a.as_i64x4(), mask:k);
34282}
34283
34284/// Convert packed signed 64-bit integers in a to packed 32-bit integers with signed saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34285///
34286/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtsepi64_storeu_epi32&expand=1867)
34287#[inline]
34288#[target_feature(enable = "avx512f,avx512vl")]
34289#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34290#[cfg_attr(test, assert_instr(vpmovsqd))]
34291pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34292 vpmovsqdmem128(mem_addr, a.as_i64x2(), mask:k);
34293}
34294
34295/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34296///
34297/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_cvtusepi64_storeu_epi32&expand=2104)
34298#[inline]
34299#[target_feature(enable = "avx512f")]
34300#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34301#[cfg_attr(test, assert_instr(vpmovusqd))]
34302pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
34303 vpmovusqdmem(mem_addr, a.as_i64x8(), mask:k);
34304}
34305
34306/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34307///
34308/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_cvtusepi64_storeu_epi32&expand=2103)
34309#[inline]
34310#[target_feature(enable = "avx512f,avx512vl")]
34311#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34312#[cfg_attr(test, assert_instr(vpmovusqd))]
34313pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
34314 vpmovusqdmem256(mem_addr, a.as_i64x4(), mask:k);
34315}
34316
34317/// Convert packed unsigned 64-bit integers in a to packed 32-bit integers with unsigned saturation, and store the active results (those with their respective bit set in writemask k) to unaligned memory at base_addr.
34318///
34319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_cvtusepi64_storeu_epi32&expand=2102)
34320#[inline]
34321#[target_feature(enable = "avx512f,avx512vl")]
34322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34323#[cfg_attr(test, assert_instr(vpmovusqd))]
34324pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
34325 vpmovusqdmem128(mem_addr, a.as_i64x2(), mask:k);
34326}
34327
34328/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34329///
34330/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi32&expand=5628)
34331#[inline]
34332#[target_feature(enable = "avx512f")]
34333#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34334#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34335pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
34336 ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a);
34337}
34338
34339/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34340///
34341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi32&expand=5626)
34342#[inline]
34343#[target_feature(enable = "avx512f,avx512vl")]
34344#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34345#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34346pub unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
34347 ptr::write_unaligned(dst:mem_addr as *mut __m256i, src:a);
34348}
34349
34350/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34351///
34352/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi32&expand=5624)
34353#[inline]
34354#[target_feature(enable = "avx512f,avx512vl")]
34355#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34356#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34357pub unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
34358 ptr::write_unaligned(dst:mem_addr as *mut __m128i, src:a);
34359}
34360
34361/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34362///
34363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_epi64&expand=3386)
34364#[inline]
34365#[target_feature(enable = "avx512f")]
34366#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34367#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34368pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
34369 ptr::read_unaligned(src:mem_addr as *const __m512i)
34370}
34371
34372/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34373///
34374/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_epi64&expand=3383)
34375#[inline]
34376#[target_feature(enable = "avx512f,avx512vl")]
34377#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34378#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34379pub unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
34380 ptr::read_unaligned(src:mem_addr as *const __m256i)
34381}
34382
34383/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34384///
34385/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_loadu_epi64&expand=3380)
34386#[inline]
34387#[target_feature(enable = "avx512f,avx512vl")]
34388#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34389#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34390pub unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
34391 ptr::read_unaligned(src:mem_addr as *const __m128i)
34392}
34393
34394/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34395///
34396/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_epi64&expand=5634)
34397#[inline]
34398#[target_feature(enable = "avx512f")]
34399#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34400#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34401pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
34402 ptr::write_unaligned(dst:mem_addr as *mut __m512i, src:a);
34403}
34404
34405/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34406///
34407/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_epi64&expand=5632)
34408#[inline]
34409#[target_feature(enable = "avx512f,avx512vl")]
34410#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34411#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34412pub unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
34413 ptr::write_unaligned(dst:mem_addr as *mut __m256i, src:a);
34414}
34415
34416/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr does not need to be aligned on any particular boundary.
34417///
34418/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_storeu_epi64&expand=5630)
34419#[inline]
34420#[target_feature(enable = "avx512f,avx512vl")]
34421#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34422#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu64
34423pub unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
34424 ptr::write_unaligned(dst:mem_addr as *mut __m128i, src:a);
34425}
34426
34427/// Load 512-bits of integer data from memory into dst. mem_addr does not need to be aligned on any particular boundary.
34428///
34429/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_si512&expand=3420)
34430#[inline]
34431#[target_feature(enable = "avx512f")]
34432#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34433#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34434pub unsafe fn _mm512_loadu_si512(mem_addr: *const i32) -> __m512i {
34435 ptr::read_unaligned(src:mem_addr as *const __m512i)
34436}
34437
34438/// Store 512-bits of integer data from a into memory. mem_addr does not need to be aligned on any particular boundary.
34439///
34440/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_si512&expand=5657)
34441#[inline]
34442#[target_feature(enable = "avx512f")]
34443#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34444#[cfg_attr(test, assert_instr(vmovups))] //should be vmovdqu32
34445pub unsafe fn _mm512_storeu_si512(mem_addr: *mut __m512i, a: __m512i) {
34446 ptr::write_unaligned(dst:mem_addr, src:a);
34447}
34448
34449/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
34450/// floating-point elements) from memory into result.
34451/// `mem_addr` does not need to be aligned on any particular boundary.
34452///
34453/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_pd)
34454#[inline]
34455#[target_feature(enable = "avx512f")]
34456#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34457#[cfg_attr(test, assert_instr(vmovups))]
34458pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
34459 ptr::read_unaligned(src:mem_addr as *const __m512d)
34460}
34461
34462/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
34463/// floating-point elements) from `a` into memory.
34464/// `mem_addr` does not need to be aligned on any particular boundary.
34465///
34466/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_pd)
34467#[inline]
34468#[target_feature(enable = "avx512f")]
34469#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34470#[cfg_attr(test, assert_instr(vmovups))]
34471pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
34472 ptr::write_unaligned(dst:mem_addr as *mut __m512d, src:a);
34473}
34474
34475/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
34476/// floating-point elements) from memory into result.
34477/// `mem_addr` does not need to be aligned on any particular boundary.
34478///
34479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_loadu_ps)
34480#[inline]
34481#[target_feature(enable = "avx512f")]
34482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34483#[cfg_attr(test, assert_instr(vmovups))]
34484pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
34485 ptr::read_unaligned(src:mem_addr as *const __m512)
34486}
34487
34488/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
34489/// floating-point elements) from `a` into memory.
34490/// `mem_addr` does not need to be aligned on any particular boundary.
34491///
34492/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_storeu_ps)
34493#[inline]
34494#[target_feature(enable = "avx512f")]
34495#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34496#[cfg_attr(test, assert_instr(vmovups))]
34497pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
34498 ptr::write_unaligned(dst:mem_addr as *mut __m512, src:a);
34499}
34500
34501/// Load 512-bits of integer data from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34502///
34503/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_si512&expand=3345)
34504#[inline]
34505#[target_feature(enable = "avx512f")]
34506#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34507#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34508pub unsafe fn _mm512_load_si512(mem_addr: *const i32) -> __m512i {
34509 ptr::read(src:mem_addr as *const __m512i)
34510}
34511
34512/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34513///
34514/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_si512&expand=5598)
34515#[inline]
34516#[target_feature(enable = "avx512f")]
34517#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34518#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34519pub unsafe fn _mm512_store_si512(mem_addr: *mut __m512i, a: __m512i) {
34520 ptr::write(dst:mem_addr, src:a);
34521}
34522
34523/// Load 512-bits (composed of 16 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34524///
34525/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi32&expand=3304)
34526#[inline]
34527#[target_feature(enable = "avx512f")]
34528#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34529#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34530pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
34531 ptr::read(src:mem_addr as *const __m512i)
34532}
34533
34534/// Load 256-bits (composed of 8 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34535///
34536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi32&expand=3301)
34537#[inline]
34538#[target_feature(enable = "avx512f,avx512vl")]
34539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34540#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34541pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
34542 ptr::read(src:mem_addr as *const __m256i)
34543}
34544
34545/// Load 128-bits (composed of 4 packed 32-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34546///
34547/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi32&expand=3298)
34548#[inline]
34549#[target_feature(enable = "avx512f,avx512vl")]
34550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34551#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34552pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
34553 ptr::read(src:mem_addr as *const __m128i)
34554}
34555
34556/// Store 512-bits (composed of 16 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34557///
34558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi32&expand=5569)
34559#[inline]
34560#[target_feature(enable = "avx512f")]
34561#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34562#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34563pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
34564 ptr::write(dst:mem_addr as *mut __m512i, src:a);
34565}
34566
34567/// Store 256-bits (composed of 8 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34568///
34569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi32&expand=5567)
34570#[inline]
34571#[target_feature(enable = "avx512f,avx512vl")]
34572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34573#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34574pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
34575 ptr::write(dst:mem_addr as *mut __m256i, src:a);
34576}
34577
34578/// Store 128-bits (composed of 4 packed 32-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34579///
34580/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi32&expand=5565)
34581#[inline]
34582#[target_feature(enable = "avx512f,avx512vl")]
34583#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34584#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa32
34585pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
34586 ptr::write(dst:mem_addr as *mut __m128i, src:a);
34587}
34588
34589/// Load 512-bits (composed of 8 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34590///
34591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_epi64&expand=3313)
34592#[inline]
34593#[target_feature(enable = "avx512f")]
34594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34595#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34596pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
34597 ptr::read(src:mem_addr as *const __m512i)
34598}
34599
34600/// Load 256-bits (composed of 4 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34601///
34602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_epi64&expand=3310)
34603#[inline]
34604#[target_feature(enable = "avx512f,avx512vl")]
34605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34606#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34607pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
34608 ptr::read(src:mem_addr as *const __m256i)
34609}
34610
34611/// Load 128-bits (composed of 2 packed 64-bit integers) from memory into dst. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34612///
34613/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_load_epi64&expand=3307)
34614#[inline]
34615#[target_feature(enable = "avx512f,avx512vl")]
34616#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34617#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34618pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
34619 ptr::read(src:mem_addr as *const __m128i)
34620}
34621
34622/// Store 512-bits (composed of 8 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34623///
34624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_epi64&expand=5575)
34625#[inline]
34626#[target_feature(enable = "avx512f")]
34627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34628#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34629pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
34630 ptr::write(dst:mem_addr as *mut __m512i, src:a);
34631}
34632
34633/// Store 256-bits (composed of 4 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
34634///
34635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_epi64&expand=5573)
34636#[inline]
34637#[target_feature(enable = "avx512f,avx512vl")]
34638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34639#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34640pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
34641 ptr::write(dst:mem_addr as *mut __m256i, src:a);
34642}
34643
34644/// Store 128-bits (composed of 2 packed 64-bit integers) from a into memory. mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
34645///
34646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_store_epi64&expand=5571)
34647#[inline]
34648#[target_feature(enable = "avx512f,avx512vl")]
34649#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34650#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovdqa64
34651pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
34652 ptr::write(dst:mem_addr as *mut __m128i, src:a);
34653}
34654
34655/// Load 512-bits (composed of 16 packed single-precision (32-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34656///
34657/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_ps&expand=3336)
34658#[inline]
34659#[target_feature(enable = "avx512f")]
34660#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34661#[cfg_attr(test, assert_instr(vmovaps))]
34662pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
34663 ptr::read(src:mem_addr as *const __m512)
34664}
34665
34666/// Store 512-bits of integer data from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34667///
34668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_ps&expand=5592)
34669#[inline]
34670#[target_feature(enable = "avx512f")]
34671#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34672#[cfg_attr(test, assert_instr(vmovaps))]
34673pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
34674 ptr::write(dst:mem_addr as *mut __m512, src:a);
34675}
34676
34677/// Load 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from memory into dst. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34678///
34679/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_load_pd&expand=3326)
34680#[inline]
34681#[target_feature(enable = "avx512f")]
34682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34683#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
34684pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
34685 ptr::read(src:mem_addr as *const __m512d)
34686}
34687
34688/// Store 512-bits (composed of 8 packed double-precision (64-bit) floating-point elements) from a into memory. mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
34689///
34690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_store_pd&expand=5585)
34691#[inline]
34692#[target_feature(enable = "avx512f")]
34693#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34694#[cfg_attr(test, assert_instr(vmovaps))] //should be vmovapd
34695pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
34696 ptr::write(dst:mem_addr as *mut __m512d, src:a);
34697}
34698
34699/// Load packed 32-bit integers from memory into dst using writemask k
34700/// (elements are copied from src when the corresponding mask bit is not set).
34701/// mem_addr does not need to be aligned on any particular boundary.
34702///
34703/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi32)
34704#[inline]
34705#[target_feature(enable = "avx512f")]
34706#[cfg_attr(test, assert_instr(vmovdqu32))]
34707#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34708pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
34709 transmute(src:loaddqu32_512(mem_addr, a:src.as_i32x16(), mask:k))
34710}
34711
34712/// Load packed 32-bit integers from memory into dst using zeromask k
34713/// (elements are zeroed out when the corresponding mask bit is not set).
34714/// mem_addr does not need to be aligned on any particular boundary.
34715///
34716/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi32)
34717#[inline]
34718#[target_feature(enable = "avx512f")]
34719#[cfg_attr(test, assert_instr(vmovdqu32))]
34720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34721pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
34722 _mm512_mask_loadu_epi32(src:_mm512_setzero_si512(), k, mem_addr)
34723}
34724
34725/// Load packed 64-bit integers from memory into dst using writemask k
34726/// (elements are copied from src when the corresponding mask bit is not set).
34727/// mem_addr does not need to be aligned on any particular boundary.
34728///
34729/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_epi64)
34730#[inline]
34731#[target_feature(enable = "avx512f")]
34732#[cfg_attr(test, assert_instr(vmovdqu64))]
34733#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34734pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
34735 transmute(src:loaddqu64_512(mem_addr, a:src.as_i64x8(), mask:k))
34736}
34737
34738/// Load packed 64-bit integers from memory into dst using zeromask k
34739/// (elements are zeroed out when the corresponding mask bit is not set).
34740/// mem_addr does not need to be aligned on any particular boundary.
34741///
34742/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_epi64)
34743#[inline]
34744#[target_feature(enable = "avx512f")]
34745#[cfg_attr(test, assert_instr(vmovdqu64))]
34746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34747pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
34748 _mm512_mask_loadu_epi64(src:_mm512_setzero_si512(), k, mem_addr)
34749}
34750
34751/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34752/// (elements are copied from src when the corresponding mask bit is not set).
34753/// mem_addr does not need to be aligned on any particular boundary.
34754///
34755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_ps)
34756#[inline]
34757#[target_feature(enable = "avx512f")]
34758#[cfg_attr(test, assert_instr(vmovups))]
34759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34760pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
34761 transmute(src:loadups_512(mem_addr, a:src.as_f32x16(), mask:k))
34762}
34763
34764/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34765/// (elements are zeroed out when the corresponding mask bit is not set).
34766/// mem_addr does not need to be aligned on any particular boundary.
34767///
34768/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_ps)
34769#[inline]
34770#[target_feature(enable = "avx512f")]
34771#[cfg_attr(test, assert_instr(vmovups))]
34772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34773pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
34774 _mm512_mask_loadu_ps(src:_mm512_setzero_ps(), k, mem_addr)
34775}
34776
34777/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34778/// (elements are copied from src when the corresponding mask bit is not set).
34779/// mem_addr does not need to be aligned on any particular boundary.
34780///
34781/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_loadu_pd)
34782#[inline]
34783#[target_feature(enable = "avx512f")]
34784#[cfg_attr(test, assert_instr(vmovupd))]
34785#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34786pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
34787 transmute(src:loadupd_512(mem_addr, a:src.as_f64x8(), mask:k))
34788}
34789
34790/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34791/// (elements are zeroed out when the corresponding mask bit is not set).
34792/// mem_addr does not need to be aligned on any particular boundary.
34793///
34794/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_loadu_pd)
34795#[inline]
34796#[target_feature(enable = "avx512f")]
34797#[cfg_attr(test, assert_instr(vmovupd))]
34798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34799pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
34800 _mm512_mask_loadu_pd(src:_mm512_setzero_pd(), k, mem_addr)
34801}
34802
34803/// Load packed 32-bit integers from memory into dst using writemask k
34804/// (elements are copied from src when the corresponding mask bit is not set).
34805/// mem_addr does not need to be aligned on any particular boundary.
34806///
34807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi32)
34808#[inline]
34809#[target_feature(enable = "avx512f,avx512vl")]
34810#[cfg_attr(test, assert_instr(vmovdqu32))]
34811#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34812pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
34813 transmute(src:loaddqu32_256(mem_addr, a:src.as_i32x8(), mask:k))
34814}
34815
34816/// Load packed 32-bit integers from memory into dst using zeromask k
34817/// (elements are zeroed out when the corresponding mask bit is not set).
34818/// mem_addr does not need to be aligned on any particular boundary.
34819///
34820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi32)
34821#[inline]
34822#[target_feature(enable = "avx512f,avx512vl")]
34823#[cfg_attr(test, assert_instr(vmovdqu32))]
34824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34825pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
34826 _mm256_mask_loadu_epi32(src:_mm256_setzero_si256(), k, mem_addr)
34827}
34828
34829/// Load packed 64-bit integers from memory into dst using writemask k
34830/// (elements are copied from src when the corresponding mask bit is not set).
34831/// mem_addr does not need to be aligned on any particular boundary.
34832///
34833/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_epi64)
34834#[inline]
34835#[target_feature(enable = "avx512f,avx512vl")]
34836#[cfg_attr(test, assert_instr(vmovdqu64))]
34837#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34838pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
34839 transmute(src:loaddqu64_256(mem_addr, a:src.as_i64x4(), mask:k))
34840}
34841
34842/// Load packed 64-bit integers from memory into dst using zeromask k
34843/// (elements are zeroed out when the corresponding mask bit is not set).
34844/// mem_addr does not need to be aligned on any particular boundary.
34845///
34846/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_epi64)
34847#[inline]
34848#[target_feature(enable = "avx512f,avx512vl")]
34849#[cfg_attr(test, assert_instr(vmovdqu64))]
34850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34851pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
34852 _mm256_mask_loadu_epi64(src:_mm256_setzero_si256(), k, mem_addr)
34853}
34854
34855/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34856/// (elements are copied from src when the corresponding mask bit is not set).
34857/// mem_addr does not need to be aligned on any particular boundary.
34858///
34859/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_ps)
34860#[inline]
34861#[target_feature(enable = "avx512f,avx512vl")]
34862#[cfg_attr(test, assert_instr(vmovups))]
34863#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34864pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
34865 transmute(src:loadups_256(mem_addr, a:src.as_f32x8(), mask:k))
34866}
34867
34868/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34869/// (elements are zeroed out when the corresponding mask bit is not set).
34870/// mem_addr does not need to be aligned on any particular boundary.
34871///
34872/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_ps)
34873#[inline]
34874#[target_feature(enable = "avx512f,avx512vl")]
34875#[cfg_attr(test, assert_instr(vmovups))]
34876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34877pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
34878 _mm256_mask_loadu_ps(src:_mm256_setzero_ps(), k, mem_addr)
34879}
34880
34881/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34882/// (elements are copied from src when the corresponding mask bit is not set).
34883/// mem_addr does not need to be aligned on any particular boundary.
34884///
34885/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_loadu_pd)
34886#[inline]
34887#[target_feature(enable = "avx512f,avx512vl")]
34888#[cfg_attr(test, assert_instr(vmovupd))]
34889#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34890pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
34891 transmute(src:loadupd_256(mem_addr, a:src.as_f64x4(), mask:k))
34892}
34893
34894/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34895/// (elements are zeroed out when the corresponding mask bit is not set).
34896/// mem_addr does not need to be aligned on any particular boundary.
34897///
34898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_loadu_pd)
34899#[inline]
34900#[target_feature(enable = "avx512f,avx512vl")]
34901#[cfg_attr(test, assert_instr(vmovupd))]
34902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34903pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
34904 _mm256_mask_loadu_pd(src:_mm256_setzero_pd(), k, mem_addr)
34905}
34906
34907/// Load packed 32-bit integers from memory into dst using writemask k
34908/// (elements are copied from src when the corresponding mask bit is not set).
34909/// mem_addr does not need to be aligned on any particular boundary.
34910///
34911/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi32)
34912#[inline]
34913#[target_feature(enable = "avx512f,avx512vl")]
34914#[cfg_attr(test, assert_instr(vmovdqu32))]
34915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34916pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
34917 transmute(src:loaddqu32_128(mem_addr, a:src.as_i32x4(), mask:k))
34918}
34919
34920/// Load packed 32-bit integers from memory into dst using zeromask k
34921/// (elements are zeroed out when the corresponding mask bit is not set).
34922/// mem_addr does not need to be aligned on any particular boundary.
34923///
34924/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi32)
34925#[inline]
34926#[target_feature(enable = "avx512f,avx512vl")]
34927#[cfg_attr(test, assert_instr(vmovdqu32))]
34928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34929pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
34930 _mm_mask_loadu_epi32(src:_mm_setzero_si128(), k, mem_addr)
34931}
34932
34933/// Load packed 64-bit integers from memory into dst using writemask k
34934/// (elements are copied from src when the corresponding mask bit is not set).
34935/// mem_addr does not need to be aligned on any particular boundary.
34936///
34937/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_epi64)
34938#[inline]
34939#[target_feature(enable = "avx512f,avx512vl")]
34940#[cfg_attr(test, assert_instr(vmovdqu64))]
34941#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34942pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
34943 transmute(src:loaddqu64_128(mem_addr, a:src.as_i64x2(), mask:k))
34944}
34945
34946/// Load packed 64-bit integers from memory into dst using zeromask k
34947/// (elements are zeroed out when the corresponding mask bit is not set).
34948/// mem_addr does not need to be aligned on any particular boundary.
34949///
34950/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_epi64)
34951#[inline]
34952#[target_feature(enable = "avx512f,avx512vl")]
34953#[cfg_attr(test, assert_instr(vmovdqu64))]
34954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34955pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
34956 _mm_mask_loadu_epi64(src:_mm_setzero_si128(), k, mem_addr)
34957}
34958
34959/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
34960/// (elements are copied from src when the corresponding mask bit is not set).
34961/// mem_addr does not need to be aligned on any particular boundary.
34962///
34963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_ps)
34964#[inline]
34965#[target_feature(enable = "avx512f,avx512vl")]
34966#[cfg_attr(test, assert_instr(vmovups))]
34967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34968pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
34969 transmute(src:loadups_128(mem_addr, a:src.as_f32x4(), mask:k))
34970}
34971
34972/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
34973/// (elements are zeroed out when the corresponding mask bit is not set).
34974/// mem_addr does not need to be aligned on any particular boundary.
34975///
34976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_ps)
34977#[inline]
34978#[target_feature(enable = "avx512f,avx512vl")]
34979#[cfg_attr(test, assert_instr(vmovups))]
34980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34981pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
34982 _mm_mask_loadu_ps(src:_mm_setzero_ps(), k, mem_addr)
34983}
34984
34985/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
34986/// (elements are copied from src when the corresponding mask bit is not set).
34987/// mem_addr does not need to be aligned on any particular boundary.
34988///
34989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_loadu_pd)
34990#[inline]
34991#[target_feature(enable = "avx512f,avx512vl")]
34992#[cfg_attr(test, assert_instr(vmovupd))]
34993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
34994pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
34995 transmute(src:loadupd_128(mem_addr, a:src.as_f64x2(), mask:k))
34996}
34997
34998/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
34999/// (elements are zeroed out when the corresponding mask bit is not set).
35000/// mem_addr does not need to be aligned on any particular boundary.
35001///
35002/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_loadu_pd)
35003#[inline]
35004#[target_feature(enable = "avx512f,avx512vl")]
35005#[cfg_attr(test, assert_instr(vmovupd))]
35006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35007pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35008 _mm_mask_loadu_pd(src:_mm_setzero_pd(), k, mem_addr)
35009}
35010
35011/// Load packed 32-bit integers from memory into dst using writemask k
35012/// (elements are copied from src when the corresponding mask bit is not set).
35013/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35014///
35015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32)
35016#[inline]
35017#[target_feature(enable = "avx512f")]
35018#[cfg_attr(test, assert_instr(vmovdqa32))]
35019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35020pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
35021 transmute(src:loaddqa32_512(mem_addr, a:src.as_i32x16(), mask:k))
35022}
35023
35024/// Load packed 32-bit integers from memory into dst using zeromask k
35025/// (elements are zeroed out when the corresponding mask bit is not set).
35026/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35027///
35028/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32)
35029#[inline]
35030#[target_feature(enable = "avx512f")]
35031#[cfg_attr(test, assert_instr(vmovdqa32))]
35032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35033pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35034 _mm512_mask_load_epi32(src:_mm512_setzero_si512(), k, mem_addr)
35035}
35036
35037/// Load packed 64-bit integers from memory into dst using writemask k
35038/// (elements are copied from src when the corresponding mask bit is not set).
35039/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35040///
35041/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64)
35042#[inline]
35043#[target_feature(enable = "avx512f")]
35044#[cfg_attr(test, assert_instr(vmovdqa64))]
35045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35046pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
35047 transmute(src:loaddqa64_512(mem_addr, a:src.as_i64x8(), mask:k))
35048}
35049
35050/// Load packed 64-bit integers from memory into dst using zeromask k
35051/// (elements are zeroed out when the corresponding mask bit is not set).
35052/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35053///
35054/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64)
35055#[inline]
35056#[target_feature(enable = "avx512f")]
35057#[cfg_attr(test, assert_instr(vmovdqa64))]
35058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35059pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35060 _mm512_mask_load_epi64(src:_mm512_setzero_si512(), k, mem_addr)
35061}
35062
35063/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35064/// (elements are copied from src when the corresponding mask bit is not set).
35065/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35066///
35067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps)
35068#[inline]
35069#[target_feature(enable = "avx512f")]
35070#[cfg_attr(test, assert_instr(vmovaps))]
35071#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35072pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
35073 transmute(src:loadaps_512(mem_addr, a:src.as_f32x16(), mask:k))
35074}
35075
35076/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35077/// (elements are zeroed out when the corresponding mask bit is not set).
35078/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35079///
35080/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps)
35081#[inline]
35082#[target_feature(enable = "avx512f")]
35083#[cfg_attr(test, assert_instr(vmovaps))]
35084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35085pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
35086 _mm512_mask_load_ps(src:_mm512_setzero_ps(), k, mem_addr)
35087}
35088
35089/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35090/// (elements are copied from src when the corresponding mask bit is not set).
35091/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35092///
35093/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd)
35094#[inline]
35095#[target_feature(enable = "avx512f")]
35096#[cfg_attr(test, assert_instr(vmovapd))]
35097#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35098pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
35099 transmute(src:loadapd_512(mem_addr, a:src.as_f64x8(), mask:k))
35100}
35101
35102/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35103/// (elements are zeroed out when the corresponding mask bit is not set).
35104/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35105///
35106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd)
35107#[inline]
35108#[target_feature(enable = "avx512f")]
35109#[cfg_attr(test, assert_instr(vmovapd))]
35110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35111pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
35112 _mm512_mask_load_pd(src:_mm512_setzero_pd(), k, mem_addr)
35113}
35114
35115/// Load packed 32-bit integers from memory into dst using writemask k
35116/// (elements are copied from src when the corresponding mask bit is not set).
35117/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35118///
35119/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32)
35120#[inline]
35121#[target_feature(enable = "avx512f,avx512vl")]
35122#[cfg_attr(test, assert_instr(vmovdqa32))]
35123#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35124pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
35125 transmute(src:loaddqa32_256(mem_addr, a:src.as_i32x8(), mask:k))
35126}
35127
35128/// Load packed 32-bit integers from memory into dst using zeromask k
35129/// (elements are zeroed out when the corresponding mask bit is not set).
35130/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35131///
35132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32)
35133#[inline]
35134#[target_feature(enable = "avx512f,avx512vl")]
35135#[cfg_attr(test, assert_instr(vmovdqa32))]
35136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35137pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35138 _mm256_mask_load_epi32(src:_mm256_setzero_si256(), k, mem_addr)
35139}
35140
35141/// Load packed 64-bit integers from memory into dst using writemask k
35142/// (elements are copied from src when the corresponding mask bit is not set).
35143/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35144///
35145/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64)
35146#[inline]
35147#[target_feature(enable = "avx512f,avx512vl")]
35148#[cfg_attr(test, assert_instr(vmovdqa64))]
35149#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35150pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
35151 transmute(src:loaddqa64_256(mem_addr, a:src.as_i64x4(), mask:k))
35152}
35153
35154/// Load packed 64-bit integers from memory into dst using zeromask k
35155/// (elements are zeroed out when the corresponding mask bit is not set).
35156/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35157///
35158/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64)
35159#[inline]
35160#[target_feature(enable = "avx512f,avx512vl")]
35161#[cfg_attr(test, assert_instr(vmovdqa64))]
35162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35163pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35164 _mm256_mask_load_epi64(src:_mm256_setzero_si256(), k, mem_addr)
35165}
35166
35167/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35168/// (elements are copied from src when the corresponding mask bit is not set).
35169/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35170///
35171/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps)
35172#[inline]
35173#[target_feature(enable = "avx512f,avx512vl")]
35174#[cfg_attr(test, assert_instr(vmovaps))]
35175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35176pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
35177 transmute(src:loadaps_256(mem_addr, a:src.as_f32x8(), mask:k))
35178}
35179
35180/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35181/// (elements are zeroed out when the corresponding mask bit is not set).
35182/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35183///
35184/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps)
35185#[inline]
35186#[target_feature(enable = "avx512f,avx512vl")]
35187#[cfg_attr(test, assert_instr(vmovaps))]
35188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35189pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
35190 _mm256_mask_load_ps(src:_mm256_setzero_ps(), k, mem_addr)
35191}
35192
35193/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35194/// (elements are copied from src when the corresponding mask bit is not set).
35195/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35196///
35197/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd)
35198#[inline]
35199#[target_feature(enable = "avx512f,avx512vl")]
35200#[cfg_attr(test, assert_instr(vmovapd))]
35201#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35202pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
35203 transmute(src:loadapd_256(mem_addr, a:src.as_f64x4(), mask:k))
35204}
35205
35206/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35207/// (elements are zeroed out when the corresponding mask bit is not set).
35208/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35209///
35210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd)
35211#[inline]
35212#[target_feature(enable = "avx512f,avx512vl")]
35213#[cfg_attr(test, assert_instr(vmovapd))]
35214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35215pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
35216 _mm256_mask_load_pd(src:_mm256_setzero_pd(), k, mem_addr)
35217}
35218
35219/// Load packed 32-bit integers from memory into dst using writemask k
35220/// (elements are copied from src when the corresponding mask bit is not set).
35221/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35222///
35223/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32)
35224#[inline]
35225#[target_feature(enable = "avx512f,avx512vl")]
35226#[cfg_attr(test, assert_instr(vmovdqa32))]
35227#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35228pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
35229 transmute(src:loaddqa32_128(mem_addr, a:src.as_i32x4(), mask:k))
35230}
35231
35232/// Load packed 32-bit integers from memory into dst using zeromask k
35233/// (elements are zeroed out when the corresponding mask bit is not set).
35234/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35235///
35236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32)
35237#[inline]
35238#[target_feature(enable = "avx512f,avx512vl")]
35239#[cfg_attr(test, assert_instr(vmovdqa32))]
35240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35241pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35242 _mm_mask_load_epi32(src:_mm_setzero_si128(), k, mem_addr)
35243}
35244
35245/// Load packed 64-bit integers from memory into dst using writemask k
35246/// (elements are copied from src when the corresponding mask bit is not set).
35247/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35248///
35249/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64)
35250#[inline]
35251#[target_feature(enable = "avx512f,avx512vl")]
35252#[cfg_attr(test, assert_instr(vmovdqa64))]
35253#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35254pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
35255 transmute(src:loaddqa64_128(mem_addr, a:src.as_i64x2(), mask:k))
35256}
35257
35258/// Load packed 64-bit integers from memory into dst using zeromask k
35259/// (elements are zeroed out when the corresponding mask bit is not set).
35260/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35261///
35262/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64)
35263#[inline]
35264#[target_feature(enable = "avx512f,avx512vl")]
35265#[cfg_attr(test, assert_instr(vmovdqa64))]
35266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35267pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35268 _mm_mask_load_epi64(src:_mm_setzero_si128(), k, mem_addr)
35269}
35270
35271/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
35272/// (elements are copied from src when the corresponding mask bit is not set).
35273/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35274///
35275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps)
35276#[inline]
35277#[target_feature(enable = "avx512f,avx512vl")]
35278#[cfg_attr(test, assert_instr(vmovaps))]
35279#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35280pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35281 transmute(src:loadaps_128(mem_addr, a:src.as_f32x4(), mask:k))
35282}
35283
35284/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
35285/// (elements are zeroed out when the corresponding mask bit is not set).
35286/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35287///
35288/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps)
35289#[inline]
35290#[target_feature(enable = "avx512f,avx512vl")]
35291#[cfg_attr(test, assert_instr(vmovaps))]
35292#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35293pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35294 _mm_mask_load_ps(src:_mm_setzero_ps(), k, mem_addr)
35295}
35296
35297/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
35298/// (elements are copied from src when the corresponding mask bit is not set).
35299/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35300///
35301/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd)
35302#[inline]
35303#[target_feature(enable = "avx512f,avx512vl")]
35304#[cfg_attr(test, assert_instr(vmovapd))]
35305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35306pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35307 transmute(src:loadapd_128(mem_addr, a:src.as_f64x2(), mask:k))
35308}
35309
35310/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
35311/// (elements are zeroed out when the corresponding mask bit is not set).
35312/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35313///
35314/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd)
35315#[inline]
35316#[target_feature(enable = "avx512f,avx512vl")]
35317#[cfg_attr(test, assert_instr(vmovapd))]
35318#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35319pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35320 _mm_mask_load_pd(src:_mm_setzero_pd(), k, mem_addr)
35321}
35322
35323/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35324/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35325/// 3 packed elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35326/// exception may be generated.
35327///
35328/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
35329#[inline]
35330#[cfg_attr(test, assert_instr(vmovss))]
35331#[target_feature(enable = "avx512f")]
35332#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35333pub unsafe fn _mm_mask_load_ss(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35334 let mut dst: __m128 = src;
35335 asm!(
35336 vpl!("vmovss {dst}{{{k}}}"),
35337 p = in(reg) mem_addr,
35338 k = in(kreg) k,
35339 dst = inout(xmm_reg) dst,
35340 options(pure, readonly, nostack, preserves_flags),
35341 );
35342 dst
35343}
35344
35345/// Load a single-precision (32-bit) floating-point element from memory into the lower element of dst
35346/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper 3 packed
35347/// elements of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35348/// exception may be generated.
35349///
35350/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_ss)
35351#[inline]
35352#[cfg_attr(test, assert_instr(vmovss))]
35353#[target_feature(enable = "avx512f")]
35354#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35355pub unsafe fn _mm_maskz_load_ss(k: __mmask8, mem_addr: *const f32) -> __m128 {
35356 let mut dst: __m128;
35357 asm!(
35358 vpl!("vmovss {dst}{{{k}}} {{z}}"),
35359 p = in(reg) mem_addr,
35360 k = in(kreg) k,
35361 dst = out(xmm_reg) dst,
35362 options(pure, readonly, nostack, preserves_flags),
35363 );
35364 dst
35365}
35366
35367/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35368/// using writemask k (the element is copied from src when mask bit 0 is not set), and set the upper
35369/// element of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection
35370/// exception may be generated.
35371///
35372/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
35373#[inline]
35374#[cfg_attr(test, assert_instr(vmovsd))]
35375#[target_feature(enable = "avx512f")]
35376#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35377pub unsafe fn _mm_mask_load_sd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
35378 let mut dst: __m128d = src;
35379 asm!(
35380 vpl!("vmovsd {dst}{{{k}}}"),
35381 p = in(reg) mem_addr,
35382 k = in(kreg) k,
35383 dst = inout(xmm_reg) dst,
35384 options(pure, readonly, nostack, preserves_flags),
35385 );
35386 dst
35387}
35388
35389/// Load a double-precision (64-bit) floating-point element from memory into the lower element of dst
35390/// using zeromask k (the element is zeroed out when mask bit 0 is not set), and set the upper element
35391/// of dst to zero. mem_addr must be aligned on a 16-byte boundary or a general-protection exception
35392/// may be generated.
35393///
35394/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_load_sd)
35395#[inline]
35396#[cfg_attr(test, assert_instr(vmovsd))]
35397#[target_feature(enable = "avx512f")]
35398#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35399pub unsafe fn _mm_maskz_load_sd(k: __mmask8, mem_addr: *const f64) -> __m128d {
35400 let mut dst: __m128d;
35401 asm!(
35402 vpl!("vmovsd {dst}{{{k}}} {{z}}"),
35403 p = in(reg) mem_addr,
35404 k = in(kreg) k,
35405 dst = out(xmm_reg) dst,
35406 options(pure, readonly, nostack, preserves_flags),
35407 );
35408 dst
35409}
35410
35411/// Store packed 32-bit integers from a into memory using writemask k.
35412/// mem_addr does not need to be aligned on any particular boundary.
35413///
35414/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi32)
35415#[inline]
35416#[target_feature(enable = "avx512f")]
35417#[cfg_attr(test, assert_instr(vmovdqu32))]
35418#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35419pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35420 storedqu32_512(mem_addr, a.as_i32x16(), mask)
35421}
35422
35423/// Store packed 64-bit integers from a into memory using writemask k.
35424/// mem_addr does not need to be aligned on any particular boundary.
35425///
35426/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_epi64)
35427#[inline]
35428#[target_feature(enable = "avx512f")]
35429#[cfg_attr(test, assert_instr(vmovdqu64))]
35430#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35431pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35432 storedqu64_512(mem_addr, a.as_i64x8(), mask)
35433}
35434
35435/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35436/// mem_addr does not need to be aligned on any particular boundary.
35437///
35438/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_ps)
35439#[inline]
35440#[target_feature(enable = "avx512f")]
35441#[cfg_attr(test, assert_instr(vmovups))]
35442#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35443pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35444 storeups_512(mem_addr, a.as_f32x16(), mask)
35445}
35446
35447/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35448/// mem_addr does not need to be aligned on any particular boundary.
35449///
35450/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_storeu_pd)
35451#[inline]
35452#[target_feature(enable = "avx512f")]
35453#[cfg_attr(test, assert_instr(vmovupd))]
35454#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35455pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35456 storeupd_512(mem_addr, a.as_f64x8(), mask)
35457}
35458
35459/// Store packed 32-bit integers from a into memory using writemask k.
35460/// mem_addr does not need to be aligned on any particular boundary.
35461///
35462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi32)
35463#[inline]
35464#[target_feature(enable = "avx512f,avx512vl")]
35465#[cfg_attr(test, assert_instr(vmovdqu32))]
35466#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35467pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35468 storedqu32_256(mem_addr, a.as_i32x8(), mask)
35469}
35470
35471/// Store packed 64-bit integers from a into memory using writemask k.
35472/// mem_addr does not need to be aligned on any particular boundary.
35473///
35474/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_epi64)
35475#[inline]
35476#[target_feature(enable = "avx512f,avx512vl")]
35477#[cfg_attr(test, assert_instr(vmovdqu64))]
35478#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35479pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35480 storedqu64_256(mem_addr, a.as_i64x4(), mask)
35481}
35482
35483/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35484/// mem_addr does not need to be aligned on any particular boundary.
35485///
35486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_ps)
35487#[inline]
35488#[target_feature(enable = "avx512f,avx512vl")]
35489#[cfg_attr(test, assert_instr(vmovups))]
35490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35491pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35492 storeups_256(mem_addr, a.as_f32x8(), mask)
35493}
35494
35495/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35496/// mem_addr does not need to be aligned on any particular boundary.
35497///
35498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_storeu_pd)
35499#[inline]
35500#[target_feature(enable = "avx512f,avx512vl")]
35501#[cfg_attr(test, assert_instr(vmovupd))]
35502#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35503pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35504 storeupd_256(mem_addr, a.as_f64x4(), mask)
35505}
35506
35507/// Store packed 32-bit integers from a into memory using writemask k.
35508/// mem_addr does not need to be aligned on any particular boundary.
35509///
35510/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi32)
35511#[inline]
35512#[target_feature(enable = "avx512f,avx512vl")]
35513#[cfg_attr(test, assert_instr(vmovdqu32))]
35514#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35515pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35516 storedqu32_128(mem_addr, a.as_i32x4(), mask)
35517}
35518
35519/// Store packed 64-bit integers from a into memory using writemask k.
35520/// mem_addr does not need to be aligned on any particular boundary.
35521///
35522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_epi64)
35523#[inline]
35524#[target_feature(enable = "avx512f,avx512vl")]
35525#[cfg_attr(test, assert_instr(vmovdqu64))]
35526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35527pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35528 storedqu64_128(mem_addr, a.as_i64x2(), mask)
35529}
35530
35531/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35532/// mem_addr does not need to be aligned on any particular boundary.
35533///
35534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_ps)
35535#[inline]
35536#[target_feature(enable = "avx512f,avx512vl")]
35537#[cfg_attr(test, assert_instr(vmovups))]
35538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35539pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35540 storeups_128(mem_addr, a.as_f32x4(), mask)
35541}
35542
35543/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35544/// mem_addr does not need to be aligned on any particular boundary.
35545///
35546/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_storeu_pd)
35547#[inline]
35548#[target_feature(enable = "avx512f,avx512vl")]
35549#[cfg_attr(test, assert_instr(vmovupd))]
35550#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35551pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35552 storeupd_128(mem_addr, a.as_f64x2(), mask)
35553}
35554
35555/// Store packed 32-bit integers from a into memory using writemask k.
35556/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35557///
35558/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32)
35559#[inline]
35560#[target_feature(enable = "avx512f")]
35561#[cfg_attr(test, assert_instr(vmovdqa32))]
35562#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35563pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
35564 storedqa32_512(mem_addr, a.as_i32x16(), mask)
35565}
35566
35567/// Store packed 64-bit integers from a into memory using writemask k.
35568/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35569///
35570/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64)
35571#[inline]
35572#[target_feature(enable = "avx512f")]
35573#[cfg_attr(test, assert_instr(vmovdqa64))]
35574#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35575pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
35576 storedqa64_512(mem_addr, a.as_i64x8(), mask)
35577}
35578
35579/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35580/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35581///
35582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps)
35583#[inline]
35584#[target_feature(enable = "avx512f")]
35585#[cfg_attr(test, assert_instr(vmovaps))]
35586#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35587pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
35588 storeaps_512(mem_addr, a.as_f32x16(), mask)
35589}
35590
35591/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35592/// mem_addr must be aligned on a 64-byte boundary or a general-protection exception may be generated.
35593///
35594/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd)
35595#[inline]
35596#[target_feature(enable = "avx512f")]
35597#[cfg_attr(test, assert_instr(vmovapd))]
35598#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35599pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
35600 storeapd_512(mem_addr, a.as_f64x8(), mask)
35601}
35602
35603/// Store packed 32-bit integers from a into memory using writemask k.
35604/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35605///
35606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32)
35607#[inline]
35608#[target_feature(enable = "avx512f,avx512vl")]
35609#[cfg_attr(test, assert_instr(vmovdqa32))]
35610#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35611pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
35612 storedqa32_256(mem_addr, a.as_i32x8(), mask)
35613}
35614
35615/// Store packed 64-bit integers from a into memory using writemask k.
35616/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35617///
35618/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64)
35619#[inline]
35620#[target_feature(enable = "avx512f,avx512vl")]
35621#[cfg_attr(test, assert_instr(vmovdqa64))]
35622#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35623pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
35624 storedqa64_256(mem_addr, a.as_i64x4(), mask)
35625}
35626
35627/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35628/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35629///
35630/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps)
35631#[inline]
35632#[target_feature(enable = "avx512f,avx512vl")]
35633#[cfg_attr(test, assert_instr(vmovaps))]
35634#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35635pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
35636 storeaps_256(mem_addr, a.as_f32x8(), mask)
35637}
35638
35639/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35640/// mem_addr must be aligned on a 32-byte boundary or a general-protection exception may be generated.
35641///
35642/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd)
35643#[inline]
35644#[target_feature(enable = "avx512f,avx512vl")]
35645#[cfg_attr(test, assert_instr(vmovapd))]
35646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35647pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
35648 storeapd_256(mem_addr, a.as_f64x4(), mask)
35649}
35650
35651/// Store packed 32-bit integers from a into memory using writemask k.
35652/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35653///
35654/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32)
35655#[inline]
35656#[target_feature(enable = "avx512f,avx512vl")]
35657#[cfg_attr(test, assert_instr(vmovdqa32))]
35658#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35659pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
35660 storedqa32_128(mem_addr, a.as_i32x4(), mask)
35661}
35662
35663/// Store packed 64-bit integers from a into memory using writemask k.
35664/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35665///
35666/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64)
35667#[inline]
35668#[target_feature(enable = "avx512f,avx512vl")]
35669#[cfg_attr(test, assert_instr(vmovdqa64))]
35670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35671pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
35672 storedqa64_128(mem_addr, a.as_i64x2(), mask)
35673}
35674
35675/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
35676/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35677///
35678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps)
35679#[inline]
35680#[target_feature(enable = "avx512f,avx512vl")]
35681#[cfg_attr(test, assert_instr(vmovaps))]
35682#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35683pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
35684 storeaps_128(mem_addr, a.as_f32x4(), mask)
35685}
35686
35687/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
35688/// mem_addr must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35689///
35690/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd)
35691#[inline]
35692#[target_feature(enable = "avx512f,avx512vl")]
35693#[cfg_attr(test, assert_instr(vmovapd))]
35694#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35695pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
35696 storeapd_128(mem_addr, a.as_f64x2(), mask)
35697}
35698
35699/// Store a single-precision (32-bit) floating-point element from a into memory using writemask k. mem_addr
35700/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35701///
35702/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_ss)
35703#[inline]
35704#[cfg_attr(test, assert_instr(vmovss))]
35705#[target_feature(enable = "avx512f")]
35706#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35707pub unsafe fn _mm_mask_store_ss(mem_addr: *mut f32, k: __mmask8, a: __m128) {
35708 asm!(
35709 vps!("vmovss", "{{{k}}}, {a}"),
35710 p = in(reg) mem_addr,
35711 k = in(kreg) k,
35712 a = in(xmm_reg) a,
35713 options(nostack, preserves_flags),
35714 );
35715}
35716
35717/// Store a double-precision (64-bit) floating-point element from a into memory using writemask k. mem_addr
35718/// must be aligned on a 16-byte boundary or a general-protection exception may be generated.
35719///
35720/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
35721#[inline]
35722#[cfg_attr(test, assert_instr(vmovsd))]
35723#[target_feature(enable = "avx512f")]
35724#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35725pub unsafe fn _mm_mask_store_sd(mem_addr: *mut f64, k: __mmask8, a: __m128d) {
35726 asm!(
35727 vps!("vmovsd", "{{{k}}}, {a}"),
35728 p = in(reg) mem_addr,
35729 k = in(kreg) k,
35730 a = in(xmm_reg) a,
35731 options(nostack, preserves_flags),
35732 );
35733}
35734
35735/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35736///
35737/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi32)
35738#[inline]
35739#[target_feature(enable = "avx512f")]
35740#[cfg_attr(test, assert_instr(vpexpandd))]
35741#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35742pub unsafe fn _mm512_mask_expandloadu_epi32(
35743 src: __m512i,
35744 k: __mmask16,
35745 mem_addr: *const i32,
35746) -> __m512i {
35747 transmute(src:expandloadd_512(mem_addr, a:src.as_i32x16(), mask:k))
35748}
35749
35750/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35751///
35752/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi32)
35753#[inline]
35754#[target_feature(enable = "avx512f")]
35755#[cfg_attr(test, assert_instr(vpexpandd))]
35756#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35757pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
35758 _mm512_mask_expandloadu_epi32(src:_mm512_setzero_si512(), k, mem_addr)
35759}
35760
35761/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35762///
35763/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi32)
35764#[inline]
35765#[target_feature(enable = "avx512f,avx512vl")]
35766#[cfg_attr(test, assert_instr(vpexpandd))]
35767#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35768pub unsafe fn _mm256_mask_expandloadu_epi32(
35769 src: __m256i,
35770 k: __mmask8,
35771 mem_addr: *const i32,
35772) -> __m256i {
35773 transmute(src:expandloadd_256(mem_addr, a:src.as_i32x8(), mask:k))
35774}
35775
35776/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35777///
35778/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi32)
35779#[inline]
35780#[target_feature(enable = "avx512f,avx512vl")]
35781#[cfg_attr(test, assert_instr(vpexpandd))]
35782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35783pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
35784 _mm256_mask_expandloadu_epi32(src:_mm256_setzero_si256(), k, mem_addr)
35785}
35786
35787/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35788///
35789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi32)
35790#[inline]
35791#[target_feature(enable = "avx512f,avx512vl")]
35792#[cfg_attr(test, assert_instr(vpexpandd))]
35793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35794pub unsafe fn _mm_mask_expandloadu_epi32(
35795 src: __m128i,
35796 k: __mmask8,
35797 mem_addr: *const i32,
35798) -> __m128i {
35799 transmute(src:expandloadd_128(mem_addr, a:src.as_i32x4(), mask:k))
35800}
35801
35802/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35803///
35804/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi32)
35805#[inline]
35806#[target_feature(enable = "avx512f,avx512vl")]
35807#[cfg_attr(test, assert_instr(vpexpandd))]
35808#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35809pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
35810 _mm_mask_expandloadu_epi32(src:_mm_setzero_si128(), k, mem_addr)
35811}
35812
35813/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35814///
35815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_epi64)
35816#[inline]
35817#[target_feature(enable = "avx512f")]
35818#[cfg_attr(test, assert_instr(vpexpandq))]
35819#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35820pub unsafe fn _mm512_mask_expandloadu_epi64(
35821 src: __m512i,
35822 k: __mmask8,
35823 mem_addr: *const i64,
35824) -> __m512i {
35825 transmute(src:expandloadq_512(mem_addr, a:src.as_i64x8(), mask:k))
35826}
35827
35828/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35829///
35830/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_epi64)
35831#[inline]
35832#[target_feature(enable = "avx512f")]
35833#[cfg_attr(test, assert_instr(vpexpandq))]
35834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35835pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
35836 _mm512_mask_expandloadu_epi64(src:_mm512_setzero_si512(), k, mem_addr)
35837}
35838
35839/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35840///
35841/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_epi64)
35842#[inline]
35843#[target_feature(enable = "avx512f,avx512vl")]
35844#[cfg_attr(test, assert_instr(vpexpandq))]
35845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35846pub unsafe fn _mm256_mask_expandloadu_epi64(
35847 src: __m256i,
35848 k: __mmask8,
35849 mem_addr: *const i64,
35850) -> __m256i {
35851 transmute(src:expandloadq_256(mem_addr, a:src.as_i64x4(), mask:k))
35852}
35853
35854/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35855///
35856/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_epi64)
35857#[inline]
35858#[target_feature(enable = "avx512f,avx512vl")]
35859#[cfg_attr(test, assert_instr(vpexpandq))]
35860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35861pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
35862 _mm256_mask_expandloadu_epi64(src:_mm256_setzero_si256(), k, mem_addr)
35863}
35864
35865/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35866///
35867/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_epi64)
35868#[inline]
35869#[target_feature(enable = "avx512f,avx512vl")]
35870#[cfg_attr(test, assert_instr(vpexpandq))]
35871#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35872pub unsafe fn _mm_mask_expandloadu_epi64(
35873 src: __m128i,
35874 k: __mmask8,
35875 mem_addr: *const i64,
35876) -> __m128i {
35877 transmute(src:expandloadq_128(mem_addr, a:src.as_i64x2(), mask:k))
35878}
35879
35880/// Load contiguous active 64-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35881///
35882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_epi64)
35883#[inline]
35884#[target_feature(enable = "avx512f,avx512vl")]
35885#[cfg_attr(test, assert_instr(vpexpandq))]
35886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35887pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
35888 _mm_mask_expandloadu_epi64(src:_mm_setzero_si128(), k, mem_addr)
35889}
35890
35891/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35892///
35893/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_ps)
35894#[inline]
35895#[target_feature(enable = "avx512f")]
35896#[cfg_attr(test, assert_instr(vexpandps))]
35897#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35898pub unsafe fn _mm512_mask_expandloadu_ps(
35899 src: __m512,
35900 k: __mmask16,
35901 mem_addr: *const f32,
35902) -> __m512 {
35903 transmute(src:expandloadps_512(mem_addr, a:src.as_f32x16(), mask:k))
35904}
35905
35906/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35907///
35908/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_ps)
35909#[inline]
35910#[target_feature(enable = "avx512f")]
35911#[cfg_attr(test, assert_instr(vexpandps))]
35912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35913pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
35914 _mm512_mask_expandloadu_ps(src:_mm512_setzero_ps(), k, mem_addr)
35915}
35916
35917/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35918///
35919/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_ps)
35920#[inline]
35921#[target_feature(enable = "avx512f,avx512vl")]
35922#[cfg_attr(test, assert_instr(vexpandps))]
35923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35924pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
35925 transmute(src:expandloadps_256(mem_addr, a:src.as_f32x8(), mask:k))
35926}
35927
35928/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35929///
35930/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_ps)
35931#[inline]
35932#[target_feature(enable = "avx512f,avx512vl")]
35933#[cfg_attr(test, assert_instr(vexpandps))]
35934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35935pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
35936 _mm256_mask_expandloadu_ps(src:_mm256_setzero_ps(), k, mem_addr)
35937}
35938
35939/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35940///
35941/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_ps)
35942#[inline]
35943#[target_feature(enable = "avx512f,avx512vl")]
35944#[cfg_attr(test, assert_instr(vexpandps))]
35945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35946pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
35947 transmute(src:expandloadps_128(mem_addr, a:src.as_f32x4(), mask:k))
35948}
35949
35950/// Load contiguous active single-precision (32-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35951///
35952/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_ps)
35953#[inline]
35954#[target_feature(enable = "avx512f,avx512vl")]
35955#[cfg_attr(test, assert_instr(vexpandps))]
35956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35957pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
35958 _mm_mask_expandloadu_ps(src:_mm_setzero_ps(), k, mem_addr)
35959}
35960
35961/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35962///
35963/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_expandloadu_pd)
35964#[inline]
35965#[target_feature(enable = "avx512f")]
35966#[cfg_attr(test, assert_instr(vexpandpd))]
35967#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35968pub unsafe fn _mm512_mask_expandloadu_pd(
35969 src: __m512d,
35970 k: __mmask8,
35971 mem_addr: *const f64,
35972) -> __m512d {
35973 transmute(src:expandloadpd_512(mem_addr, a:src.as_f64x8(), mask:k))
35974}
35975
35976/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
35977///
35978/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_expandloadu_pd)
35979#[inline]
35980#[target_feature(enable = "avx512f")]
35981#[cfg_attr(test, assert_instr(vexpandpd))]
35982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35983pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
35984 _mm512_mask_expandloadu_pd(src:_mm512_setzero_pd(), k, mem_addr)
35985}
35986
35987/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
35988///
35989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_expandloadu_pd)
35990#[inline]
35991#[target_feature(enable = "avx512f,avx512vl")]
35992#[cfg_attr(test, assert_instr(vexpandpd))]
35993#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
35994pub unsafe fn _mm256_mask_expandloadu_pd(
35995 src: __m256d,
35996 k: __mmask8,
35997 mem_addr: *const f64,
35998) -> __m256d {
35999 transmute(src:expandloadpd_256(mem_addr, a:src.as_f64x4(), mask:k))
36000}
36001
36002/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36003///
36004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_expandloadu_pd)
36005#[inline]
36006#[target_feature(enable = "avx512f,avx512vl")]
36007#[cfg_attr(test, assert_instr(vexpandpd))]
36008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36009pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
36010 _mm256_mask_expandloadu_pd(src:_mm256_setzero_pd(), k, mem_addr)
36011}
36012
36013/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
36014///
36015/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_expandloadu_pd)
36016#[inline]
36017#[target_feature(enable = "avx512f,avx512vl")]
36018#[cfg_attr(test, assert_instr(vexpandpd))]
36019#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36020pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
36021 transmute(src:expandloadpd_128(mem_addr, a:src.as_f64x2(), mask:k))
36022}
36023
36024/// Load contiguous active double-precision (64-bit) floating-point elements from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
36025///
36026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_expandloadu_pd)
36027#[inline]
36028#[target_feature(enable = "avx512f,avx512vl")]
36029#[cfg_attr(test, assert_instr(vexpandpd))]
36030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36031pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
36032 _mm_mask_expandloadu_pd(src:_mm_setzero_pd(), k, mem_addr)
36033}
36034
36035/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values in reverse order.
36036///
36037/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_setr_pd&expand=5002)
36038#[inline]
36039#[target_feature(enable = "avx512f")]
36040#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36041pub fn _mm512_setr_pd(
36042 e0: f64,
36043 e1: f64,
36044 e2: f64,
36045 e3: f64,
36046 e4: f64,
36047 e5: f64,
36048 e6: f64,
36049 e7: f64,
36050) -> __m512d {
36051 unsafe {
36052 let r: f64x8 = f64x8::new(x0:e0, x1:e1, x2:e2, x3:e3, x4:e4, x5:e5, x6:e6, x7:e7);
36053 transmute(src:r)
36054 }
36055}
36056
36057/// Set packed double-precision (64-bit) floating-point elements in dst with the supplied values.
36058///
36059/// [Intel's documentation]( https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_set_pd&expand=4924)
36060#[inline]
36061#[target_feature(enable = "avx512f")]
36062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36063pub fn _mm512_set_pd(
36064 e0: f64,
36065 e1: f64,
36066 e2: f64,
36067 e3: f64,
36068 e4: f64,
36069 e5: f64,
36070 e6: f64,
36071 e7: f64,
36072) -> __m512d {
36073 _mm512_setr_pd(e0:e7, e1:e6, e2:e5, e3:e4, e4:e3, e5:e2, e6:e1, e7:e0)
36074}
36075
36076/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36077///
36078/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_ss&expand=3832)
36079#[inline]
36080#[target_feature(enable = "avx512f")]
36081#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36082#[cfg_attr(test, assert_instr(vmovss))]
36083pub fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36084 unsafe {
36085 let extractsrc: f32 = simd_extract!(src, 0);
36086 let mut mov: f32 = extractsrc;
36087 if (k & 0b00000001) != 0 {
36088 mov = simd_extract!(b, 0);
36089 }
36090 simd_insert!(a, 0, mov)
36091 }
36092}
36093
36094/// Move the lower single-precision (32-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36095///
36096/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_ss&expand=3833)
36097#[inline]
36098#[target_feature(enable = "avx512f")]
36099#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36100#[cfg_attr(test, assert_instr(vmovss))]
36101pub fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36102 unsafe {
36103 let mut mov: f32 = 0.;
36104 if (k & 0b00000001) != 0 {
36105 mov = simd_extract!(b, 0);
36106 }
36107 simd_insert!(a, 0, mov)
36108 }
36109}
36110
36111/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36112///
36113/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_move_sd&expand=3829)
36114#[inline]
36115#[target_feature(enable = "avx512f")]
36116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36117#[cfg_attr(test, assert_instr(vmovsd))]
36118pub fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36119 unsafe {
36120 let extractsrc: f64 = simd_extract!(src, 0);
36121 let mut mov: f64 = extractsrc;
36122 if (k & 0b00000001) != 0 {
36123 mov = simd_extract!(b, 0);
36124 }
36125 simd_insert!(a, 0, mov)
36126 }
36127}
36128
36129/// Move the lower double-precision (64-bit) floating-point element from b to the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36130///
36131/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_move_sd&expand=3830)
36132#[inline]
36133#[target_feature(enable = "avx512f")]
36134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36135#[cfg_attr(test, assert_instr(vmovsd))]
36136pub fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36137 unsafe {
36138 let mut mov: f64 = 0.;
36139 if (k & 0b00000001) != 0 {
36140 mov = simd_extract!(b, 0);
36141 }
36142 simd_insert!(a, 0, mov)
36143 }
36144}
36145
36146/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36147///
36148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_ss&expand=159)
36149#[inline]
36150#[target_feature(enable = "avx512f")]
36151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36152#[cfg_attr(test, assert_instr(vaddss))]
36153pub fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36154 unsafe {
36155 let extractsrc: f32 = simd_extract!(src, 0);
36156 let mut add: f32 = extractsrc;
36157 if (k & 0b00000001) != 0 {
36158 let extracta: f32 = simd_extract!(a, 0);
36159 let extractb: f32 = simd_extract!(b, 0);
36160 add = extracta + extractb;
36161 }
36162 simd_insert!(a, 0, add)
36163 }
36164}
36165
36166/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36167///
36168/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_ss&expand=160)
36169#[inline]
36170#[target_feature(enable = "avx512f")]
36171#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36172#[cfg_attr(test, assert_instr(vaddss))]
36173pub fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36174 unsafe {
36175 let mut add: f32 = 0.;
36176 if (k & 0b00000001) != 0 {
36177 let extracta: f32 = simd_extract!(a, 0);
36178 let extractb: f32 = simd_extract!(b, 0);
36179 add = extracta + extractb;
36180 }
36181 simd_insert!(a, 0, add)
36182 }
36183}
36184
36185/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36186///
36187/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_sd&expand=155)
36188#[inline]
36189#[target_feature(enable = "avx512f")]
36190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36191#[cfg_attr(test, assert_instr(vaddsd))]
36192pub fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36193 unsafe {
36194 let extractsrc: f64 = simd_extract!(src, 0);
36195 let mut add: f64 = extractsrc;
36196 if (k & 0b00000001) != 0 {
36197 let extracta: f64 = simd_extract!(a, 0);
36198 let extractb: f64 = simd_extract!(b, 0);
36199 add = extracta + extractb;
36200 }
36201 simd_insert!(a, 0, add)
36202 }
36203}
36204
36205/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36206///
36207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_add_sd&expand=156)
36208#[inline]
36209#[target_feature(enable = "avx512f")]
36210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36211#[cfg_attr(test, assert_instr(vaddsd))]
36212pub fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36213 unsafe {
36214 let mut add: f64 = 0.;
36215 if (k & 0b00000001) != 0 {
36216 let extracta: f64 = simd_extract!(a, 0);
36217 let extractb: f64 = simd_extract!(b, 0);
36218 add = extracta + extractb;
36219 }
36220 simd_insert!(a, 0, add)
36221 }
36222}
36223
36224/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36225///
36226/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_ss&expand=5750)
36227#[inline]
36228#[target_feature(enable = "avx512f")]
36229#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36230#[cfg_attr(test, assert_instr(vsubss))]
36231pub fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36232 unsafe {
36233 let extractsrc: f32 = simd_extract!(src, 0);
36234 let mut add: f32 = extractsrc;
36235 if (k & 0b00000001) != 0 {
36236 let extracta: f32 = simd_extract!(a, 0);
36237 let extractb: f32 = simd_extract!(b, 0);
36238 add = extracta - extractb;
36239 }
36240 simd_insert!(a, 0, add)
36241 }
36242}
36243
36244/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36245///
36246/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_ss&expand=5751)
36247#[inline]
36248#[target_feature(enable = "avx512f")]
36249#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36250#[cfg_attr(test, assert_instr(vsubss))]
36251pub fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36252 unsafe {
36253 let mut add: f32 = 0.;
36254 if (k & 0b00000001) != 0 {
36255 let extracta: f32 = simd_extract!(a, 0);
36256 let extractb: f32 = simd_extract!(b, 0);
36257 add = extracta - extractb;
36258 }
36259 simd_insert!(a, 0, add)
36260 }
36261}
36262
36263/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36264///
36265/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_sd&expand=5746)
36266#[inline]
36267#[target_feature(enable = "avx512f")]
36268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36269#[cfg_attr(test, assert_instr(vsubsd))]
36270pub fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36271 unsafe {
36272 let extractsrc: f64 = simd_extract!(src, 0);
36273 let mut add: f64 = extractsrc;
36274 if (k & 0b00000001) != 0 {
36275 let extracta: f64 = simd_extract!(a, 0);
36276 let extractb: f64 = simd_extract!(b, 0);
36277 add = extracta - extractb;
36278 }
36279 simd_insert!(a, 0, add)
36280 }
36281}
36282
36283/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36284///
36285/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_sd&expand=5747)
36286#[inline]
36287#[target_feature(enable = "avx512f")]
36288#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36289#[cfg_attr(test, assert_instr(vsubsd))]
36290pub fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36291 unsafe {
36292 let mut add: f64 = 0.;
36293 if (k & 0b00000001) != 0 {
36294 let extracta: f64 = simd_extract!(a, 0);
36295 let extractb: f64 = simd_extract!(b, 0);
36296 add = extracta - extractb;
36297 }
36298 simd_insert!(a, 0, add)
36299 }
36300}
36301
36302/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36303///
36304/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_ss&expand=3950)
36305#[inline]
36306#[target_feature(enable = "avx512f")]
36307#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36308#[cfg_attr(test, assert_instr(vmulss))]
36309pub fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36310 unsafe {
36311 let extractsrc: f32 = simd_extract!(src, 0);
36312 let mut add: f32 = extractsrc;
36313 if (k & 0b00000001) != 0 {
36314 let extracta: f32 = simd_extract!(a, 0);
36315 let extractb: f32 = simd_extract!(b, 0);
36316 add = extracta * extractb;
36317 }
36318 simd_insert!(a, 0, add)
36319 }
36320}
36321
36322/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36323///
36324/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_ss&expand=3951)
36325#[inline]
36326#[target_feature(enable = "avx512f")]
36327#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36328#[cfg_attr(test, assert_instr(vmulss))]
36329pub fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36330 unsafe {
36331 let mut add: f32 = 0.;
36332 if (k & 0b00000001) != 0 {
36333 let extracta: f32 = simd_extract!(a, 0);
36334 let extractb: f32 = simd_extract!(b, 0);
36335 add = extracta * extractb;
36336 }
36337 simd_insert!(a, 0, add)
36338 }
36339}
36340
36341/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36342///
36343/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_sd&expand=3947)
36344#[inline]
36345#[target_feature(enable = "avx512f")]
36346#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36347#[cfg_attr(test, assert_instr(vmulsd))]
36348pub fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36349 unsafe {
36350 let extractsrc: f64 = simd_extract!(src, 0);
36351 let mut add: f64 = extractsrc;
36352 if (k & 0b00000001) != 0 {
36353 let extracta: f64 = simd_extract!(a, 0);
36354 let extractb: f64 = simd_extract!(b, 0);
36355 add = extracta * extractb;
36356 }
36357 simd_insert!(a, 0, add)
36358 }
36359}
36360
36361/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36362///
36363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_sd&expand=3948)
36364#[inline]
36365#[target_feature(enable = "avx512f")]
36366#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36367#[cfg_attr(test, assert_instr(vmulsd))]
36368pub fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36369 unsafe {
36370 let mut add: f64 = 0.;
36371 if (k & 0b00000001) != 0 {
36372 let extracta: f64 = simd_extract!(a, 0);
36373 let extractb: f64 = simd_extract!(b, 0);
36374 add = extracta * extractb;
36375 }
36376 simd_insert!(a, 0, add)
36377 }
36378}
36379
36380/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36381///
36382/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_ss&expand=2181)
36383#[inline]
36384#[target_feature(enable = "avx512f")]
36385#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36386#[cfg_attr(test, assert_instr(vdivss))]
36387pub fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36388 unsafe {
36389 let extractsrc: f32 = simd_extract!(src, 0);
36390 let mut add: f32 = extractsrc;
36391 if (k & 0b00000001) != 0 {
36392 let extracta: f32 = simd_extract!(a, 0);
36393 let extractb: f32 = simd_extract!(b, 0);
36394 add = extracta / extractb;
36395 }
36396 simd_insert!(a, 0, add)
36397 }
36398}
36399
36400/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36401///
36402/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_ss&expand=2182)
36403#[inline]
36404#[target_feature(enable = "avx512f")]
36405#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36406#[cfg_attr(test, assert_instr(vdivss))]
36407pub fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36408 unsafe {
36409 let mut add: f32 = 0.;
36410 if (k & 0b00000001) != 0 {
36411 let extracta: f32 = simd_extract!(a, 0);
36412 let extractb: f32 = simd_extract!(b, 0);
36413 add = extracta / extractb;
36414 }
36415 simd_insert!(a, 0, add)
36416 }
36417}
36418
36419/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36420///
36421/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_sd&expand=2178)
36422#[inline]
36423#[target_feature(enable = "avx512f")]
36424#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36425#[cfg_attr(test, assert_instr(vdivsd))]
36426pub fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36427 unsafe {
36428 let extractsrc: f64 = simd_extract!(src, 0);
36429 let mut add: f64 = extractsrc;
36430 if (k & 0b00000001) != 0 {
36431 let extracta: f64 = simd_extract!(a, 0);
36432 let extractb: f64 = simd_extract!(b, 0);
36433 add = extracta / extractb;
36434 }
36435 simd_insert!(a, 0, add)
36436 }
36437}
36438
36439/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36440///
36441/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_sd&expand=2179)
36442#[inline]
36443#[target_feature(enable = "avx512f")]
36444#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36445#[cfg_attr(test, assert_instr(vdivsd))]
36446pub fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36447 unsafe {
36448 let mut add: f64 = 0.;
36449 if (k & 0b00000001) != 0 {
36450 let extracta: f64 = simd_extract!(a, 0);
36451 let extractb: f64 = simd_extract!(b, 0);
36452 add = extracta / extractb;
36453 }
36454 simd_insert!(a, 0, add)
36455 }
36456}
36457
36458/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36459///
36460/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_ss&expand=3672)
36461#[inline]
36462#[target_feature(enable = "avx512f")]
36463#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36464#[cfg_attr(test, assert_instr(vmaxss))]
36465pub fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36466 unsafe {
36467 transmute(src:vmaxss(
36468 a.as_f32x4(),
36469 b.as_f32x4(),
36470 src.as_f32x4(),
36471 mask:k,
36472 _MM_FROUND_CUR_DIRECTION,
36473 ))
36474 }
36475}
36476
36477/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36478///
36479/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_ss&expand=3673)
36480#[inline]
36481#[target_feature(enable = "avx512f")]
36482#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36483#[cfg_attr(test, assert_instr(vmaxss))]
36484pub fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36485 unsafe {
36486 transmute(src:vmaxss(
36487 a.as_f32x4(),
36488 b.as_f32x4(),
36489 src:f32x4::ZERO,
36490 mask:k,
36491 _MM_FROUND_CUR_DIRECTION,
36492 ))
36493 }
36494}
36495
36496/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36497///
36498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_sd&expand=3669)
36499#[inline]
36500#[target_feature(enable = "avx512f")]
36501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36502#[cfg_attr(test, assert_instr(vmaxsd))]
36503pub fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36504 unsafe {
36505 transmute(src:vmaxsd(
36506 a.as_f64x2(),
36507 b.as_f64x2(),
36508 src.as_f64x2(),
36509 mask:k,
36510 _MM_FROUND_CUR_DIRECTION,
36511 ))
36512 }
36513}
36514
36515/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36516///
36517/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_sd&expand=3670)
36518#[inline]
36519#[target_feature(enable = "avx512f")]
36520#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36521#[cfg_attr(test, assert_instr(vmaxsd))]
36522pub fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36523 unsafe {
36524 transmute(src:vmaxsd(
36525 a.as_f64x2(),
36526 b.as_f64x2(),
36527 src:f64x2::ZERO,
36528 mask:k,
36529 _MM_FROUND_CUR_DIRECTION,
36530 ))
36531 }
36532}
36533
36534/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36535///
36536/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_ss&expand=3786)
36537#[inline]
36538#[target_feature(enable = "avx512f")]
36539#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36540#[cfg_attr(test, assert_instr(vminss))]
36541pub fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36542 unsafe {
36543 transmute(src:vminss(
36544 a.as_f32x4(),
36545 b.as_f32x4(),
36546 src.as_f32x4(),
36547 mask:k,
36548 _MM_FROUND_CUR_DIRECTION,
36549 ))
36550 }
36551}
36552
36553/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36554///
36555/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_ss&expand=3787)
36556#[inline]
36557#[target_feature(enable = "avx512f")]
36558#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36559#[cfg_attr(test, assert_instr(vminss))]
36560pub fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36561 unsafe {
36562 transmute(src:vminss(
36563 a.as_f32x4(),
36564 b.as_f32x4(),
36565 src:f32x4::ZERO,
36566 mask:k,
36567 _MM_FROUND_CUR_DIRECTION,
36568 ))
36569 }
36570}
36571
36572/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36573///
36574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_sd&expand=3783)
36575#[inline]
36576#[target_feature(enable = "avx512f")]
36577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36578#[cfg_attr(test, assert_instr(vminsd))]
36579pub fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36580 unsafe {
36581 transmute(src:vminsd(
36582 a.as_f64x2(),
36583 b.as_f64x2(),
36584 src.as_f64x2(),
36585 mask:k,
36586 _MM_FROUND_CUR_DIRECTION,
36587 ))
36588 }
36589}
36590
36591/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36592///
36593/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_sd&expand=3784)
36594#[inline]
36595#[target_feature(enable = "avx512f")]
36596#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36597#[cfg_attr(test, assert_instr(vminsd))]
36598pub fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36599 unsafe {
36600 transmute(src:vminsd(
36601 a.as_f64x2(),
36602 b.as_f64x2(),
36603 src:f64x2::ZERO,
36604 mask:k,
36605 _MM_FROUND_CUR_DIRECTION,
36606 ))
36607 }
36608}
36609
36610/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36611///
36612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_ss&expand=5387)
36613#[inline]
36614#[target_feature(enable = "avx512f")]
36615#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36616#[cfg_attr(test, assert_instr(vsqrtss))]
36617pub fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36618 unsafe { vsqrtss(a, b, src, mask:k, _MM_FROUND_CUR_DIRECTION) }
36619}
36620
36621/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
36622///
36623/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_ss&expand=5388)
36624#[inline]
36625#[target_feature(enable = "avx512f")]
36626#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36627#[cfg_attr(test, assert_instr(vsqrtss))]
36628pub fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36629 unsafe { vsqrtss(a, b, src:_mm_setzero_ps(), mask:k, _MM_FROUND_CUR_DIRECTION) }
36630}
36631
36632/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36633///
36634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_sd&expand=5384)
36635#[inline]
36636#[target_feature(enable = "avx512f")]
36637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36638#[cfg_attr(test, assert_instr(vsqrtsd))]
36639pub fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36640 unsafe { vsqrtsd(a, b, src, mask:k, _MM_FROUND_CUR_DIRECTION) }
36641}
36642
36643/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
36644///
36645/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_sd&expand=5385)
36646#[inline]
36647#[target_feature(enable = "avx512f")]
36648#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36649#[cfg_attr(test, assert_instr(vsqrtsd))]
36650pub fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36651 unsafe { vsqrtsd(a, b, src:_mm_setzero_pd(), mask:k, _MM_FROUND_CUR_DIRECTION) }
36652}
36653
36654/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36655///
36656/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_ss&expand=4825)
36657#[inline]
36658#[target_feature(enable = "avx512f")]
36659#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36660#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36661pub fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
36662 unsafe { transmute(src:vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:0b1)) }
36663}
36664
36665/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36666///
36667/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_ss&expand=4823)
36668#[inline]
36669#[target_feature(enable = "avx512f")]
36670#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36671#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36672pub fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36673 unsafe { transmute(src:vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), mask:k)) }
36674}
36675
36676/// Compute the approximate reciprocal square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36677///
36678/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_ss&expand=4824)
36679#[inline]
36680#[target_feature(enable = "avx512f")]
36681#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36682#[cfg_attr(test, assert_instr(vrsqrt14ss))]
36683pub fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36684 unsafe { transmute(src:vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:k)) }
36685}
36686
36687/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36688///
36689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rsqrt14_sd&expand=4822)
36690#[inline]
36691#[target_feature(enable = "avx512f")]
36692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36693#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36694pub fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
36695 unsafe { transmute(src:vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:0b1)) }
36696}
36697
36698/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36699///
36700/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rsqrt14_sd&expand=4820)
36701#[inline]
36702#[target_feature(enable = "avx512f")]
36703#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36704#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36705pub fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36706 unsafe { transmute(src:vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), mask:k)) }
36707}
36708
36709/// Compute the approximate reciprocal square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36710///
36711/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rsqrt14_sd&expand=4821)
36712#[inline]
36713#[target_feature(enable = "avx512f")]
36714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36715#[cfg_attr(test, assert_instr(vrsqrt14sd))]
36716pub fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36717 unsafe { transmute(src:vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:k)) }
36718}
36719
36720/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36721///
36722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_ss&expand=4508)
36723#[inline]
36724#[target_feature(enable = "avx512f")]
36725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36726#[cfg_attr(test, assert_instr(vrcp14ss))]
36727pub fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
36728 unsafe { transmute(src:vrcp14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:0b1)) }
36729}
36730
36731/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36732///
36733/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_ss&expand=4506)
36734#[inline]
36735#[target_feature(enable = "avx512f")]
36736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36737#[cfg_attr(test, assert_instr(vrcp14ss))]
36738pub fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36739 unsafe { transmute(src:vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), mask:k)) }
36740}
36741
36742/// Compute the approximate reciprocal of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. The maximum relative error for this approximation is less than 2^-14.
36743///
36744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_ss&expand=4507)
36745#[inline]
36746#[target_feature(enable = "avx512f")]
36747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36748#[cfg_attr(test, assert_instr(vrcp14ss))]
36749pub fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36750 unsafe { transmute(src:vrcp14ss(a.as_f32x4(), b.as_f32x4(), src:f32x4::ZERO, mask:k)) }
36751}
36752
36753/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36754///
36755/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_rcp14_sd&expand=4505)
36756#[inline]
36757#[target_feature(enable = "avx512f")]
36758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36759#[cfg_attr(test, assert_instr(vrcp14sd))]
36760pub fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
36761 unsafe { transmute(src:vrcp14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:0b1)) }
36762}
36763
36764/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36765///
36766/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_rcp14_sd&expand=4503)
36767#[inline]
36768#[target_feature(enable = "avx512f")]
36769#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36770#[cfg_attr(test, assert_instr(vrcp14sd))]
36771pub fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36772 unsafe { transmute(src:vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), mask:k)) }
36773}
36774
36775/// Compute the approximate reciprocal of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. The maximum relative error for this approximation is less than 2^-14.
36776///
36777/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_rcp14_sd&expand=4504)
36778#[inline]
36779#[target_feature(enable = "avx512f")]
36780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36781#[cfg_attr(test, assert_instr(vrcp14sd))]
36782pub fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36783 unsafe { transmute(src:vrcp14sd(a.as_f64x2(), b.as_f64x2(), src:f64x2::ZERO, mask:k)) }
36784}
36785
36786/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36787///
36788/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_ss&expand=2862)
36789#[inline]
36790#[target_feature(enable = "avx512f")]
36791#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36792#[cfg_attr(test, assert_instr(vgetexpss))]
36793pub fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
36794 unsafe {
36795 transmute(src:vgetexpss(
36796 a.as_f32x4(),
36797 b.as_f32x4(),
36798 src:f32x4::ZERO,
36799 mask:0b1,
36800 _MM_FROUND_NO_EXC,
36801 ))
36802 }
36803}
36804
36805/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36806///
36807/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_ss&expand=2863)
36808#[inline]
36809#[target_feature(enable = "avx512f")]
36810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36811#[cfg_attr(test, assert_instr(vgetexpss))]
36812pub fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
36813 unsafe {
36814 transmute(src:vgetexpss(
36815 a.as_f32x4(),
36816 b.as_f32x4(),
36817 src.as_f32x4(),
36818 mask:k,
36819 _MM_FROUND_NO_EXC,
36820 ))
36821 }
36822}
36823
36824/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36825///
36826/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_ss&expand=2864)
36827#[inline]
36828#[target_feature(enable = "avx512f")]
36829#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36830#[cfg_attr(test, assert_instr(vgetexpss))]
36831pub fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
36832 unsafe {
36833 transmute(src:vgetexpss(
36834 a.as_f32x4(),
36835 b.as_f32x4(),
36836 src:f32x4::ZERO,
36837 mask:k,
36838 _MM_FROUND_NO_EXC,
36839 ))
36840 }
36841}
36842
36843/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36844///
36845/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_sd&expand=2859)
36846#[inline]
36847#[target_feature(enable = "avx512f")]
36848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36849#[cfg_attr(test, assert_instr(vgetexpsd))]
36850pub fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
36851 unsafe {
36852 transmute(src:vgetexpsd(
36853 a.as_f64x2(),
36854 b.as_f64x2(),
36855 src:f64x2::ZERO,
36856 mask:0b1,
36857 _MM_FROUND_NO_EXC,
36858 ))
36859 }
36860}
36861
36862/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36863///
36864/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_sd&expand=2860)
36865#[inline]
36866#[target_feature(enable = "avx512f")]
36867#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36868#[cfg_attr(test, assert_instr(vgetexpsd))]
36869pub fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36870 unsafe {
36871 transmute(src:vgetexpsd(
36872 a.as_f64x2(),
36873 b.as_f64x2(),
36874 src.as_f64x2(),
36875 mask:k,
36876 _MM_FROUND_NO_EXC,
36877 ))
36878 }
36879}
36880
36881/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.
36882///
36883/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_sd&expand=2861)
36884#[inline]
36885#[target_feature(enable = "avx512f")]
36886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36887#[cfg_attr(test, assert_instr(vgetexpsd))]
36888pub fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
36889 unsafe {
36890 transmute(src:vgetexpsd(
36891 a.as_f64x2(),
36892 b.as_f64x2(),
36893 src:f64x2::ZERO,
36894 mask:k,
36895 _MM_FROUND_NO_EXC,
36896 ))
36897 }
36898}
36899
36900/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36901/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36902/// _MM_MANT_NORM_1_2 // interval [1, 2)\
36903/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
36904/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
36905/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
36906/// The sign is determined by sc which can take the following values:\
36907/// _MM_MANT_SIGN_src // sign = sign(src)\
36908/// _MM_MANT_SIGN_zero // sign = 0\
36909/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
36910/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36911///
36912/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_ss&expand=2898)
36913#[inline]
36914#[target_feature(enable = "avx512f")]
36915#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36916#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
36917#[rustc_legacy_const_generics(2, 3)]
36918pub fn _mm_getmant_ss<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
36919 a: __m128,
36920 b: __m128,
36921) -> __m128 {
36922 unsafe {
36923 static_assert_uimm_bits!(NORM, 4);
36924 static_assert_uimm_bits!(SIGN, 2);
36925 let a: f32x4 = a.as_f32x4();
36926 let b: f32x4 = b.as_f32x4();
36927 let r: f32x4 = vgetmantss(
36928 a,
36929 b,
36930 SIGN << 2 | NORM,
36931 src:f32x4::ZERO,
36932 m:0b1,
36933 _MM_FROUND_CUR_DIRECTION,
36934 );
36935 transmute(src:r)
36936 }
36937}
36938
36939/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36940/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36941/// _MM_MANT_NORM_1_2 // interval [1, 2)\
36942/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
36943/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
36944/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
36945/// The sign is determined by sc which can take the following values:\
36946/// _MM_MANT_SIGN_src // sign = sign(src)\
36947/// _MM_MANT_SIGN_zero // sign = 0\
36948/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
36949/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36950///
36951/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_ss&expand=2899)
36952#[inline]
36953#[target_feature(enable = "avx512f")]
36954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36955#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
36956#[rustc_legacy_const_generics(4, 5)]
36957pub fn _mm_mask_getmant_ss<
36958 const NORM: _MM_MANTISSA_NORM_ENUM,
36959 const SIGN: _MM_MANTISSA_SIGN_ENUM,
36960>(
36961 src: __m128,
36962 k: __mmask8,
36963 a: __m128,
36964 b: __m128,
36965) -> __m128 {
36966 unsafe {
36967 static_assert_uimm_bits!(NORM, 4);
36968 static_assert_uimm_bits!(SIGN, 2);
36969 let a: f32x4 = a.as_f32x4();
36970 let b: f32x4 = b.as_f32x4();
36971 let src: f32x4 = src.as_f32x4();
36972 let r: f32x4 = vgetmantss(a, b, SIGN << 2 | NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
36973 transmute(src:r)
36974 }
36975}
36976
36977/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
36978/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
36979/// _MM_MANT_NORM_1_2 // interval [1, 2)\
36980/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
36981/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
36982/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
36983/// The sign is determined by sc which can take the following values:\
36984/// _MM_MANT_SIGN_src // sign = sign(src)\
36985/// _MM_MANT_SIGN_zero // sign = 0\
36986/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
36987/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
36988///
36989/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_ss&expand=2900)
36990#[inline]
36991#[target_feature(enable = "avx512f")]
36992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
36993#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0))]
36994#[rustc_legacy_const_generics(3, 4)]
36995pub fn _mm_maskz_getmant_ss<
36996 const NORM: _MM_MANTISSA_NORM_ENUM,
36997 const SIGN: _MM_MANTISSA_SIGN_ENUM,
36998>(
36999 k: __mmask8,
37000 a: __m128,
37001 b: __m128,
37002) -> __m128 {
37003 unsafe {
37004 static_assert_uimm_bits!(NORM, 4);
37005 static_assert_uimm_bits!(SIGN, 2);
37006 let a: f32x4 = a.as_f32x4();
37007 let b: f32x4 = b.as_f32x4();
37008 let r: f32x4 = vgetmantss(
37009 a,
37010 b,
37011 SIGN << 2 | NORM,
37012 src:f32x4::ZERO,
37013 m:k,
37014 _MM_FROUND_CUR_DIRECTION,
37015 );
37016 transmute(src:r)
37017 }
37018}
37019
37020/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37021/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37022/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37023/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37024/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37025/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37026/// The sign is determined by sc which can take the following values:\
37027/// _MM_MANT_SIGN_src // sign = sign(src)\
37028/// _MM_MANT_SIGN_zero // sign = 0\
37029/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37030/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37031///
37032/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_sd&expand=2895)
37033#[inline]
37034#[target_feature(enable = "avx512f")]
37035#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37036#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37037#[rustc_legacy_const_generics(2, 3)]
37038pub fn _mm_getmant_sd<const NORM: _MM_MANTISSA_NORM_ENUM, const SIGN: _MM_MANTISSA_SIGN_ENUM>(
37039 a: __m128d,
37040 b: __m128d,
37041) -> __m128d {
37042 unsafe {
37043 static_assert_uimm_bits!(NORM, 4);
37044 static_assert_uimm_bits!(SIGN, 2);
37045 let a: f64x2 = a.as_f64x2();
37046 let b: f64x2 = b.as_f64x2();
37047 let r: f64x2 = vgetmantsd(
37048 a,
37049 b,
37050 SIGN << 2 | NORM,
37051 src:f64x2::ZERO,
37052 m:0b1,
37053 _MM_FROUND_CUR_DIRECTION,
37054 );
37055 transmute(src:r)
37056 }
37057}
37058
37059/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37060/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37061/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37062/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37063/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37064/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37065/// The sign is determined by sc which can take the following values:\
37066/// _MM_MANT_SIGN_src // sign = sign(src)\
37067/// _MM_MANT_SIGN_zero // sign = 0\
37068/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37069/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37070///
37071/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_sd&expand=2896)
37072#[inline]
37073#[target_feature(enable = "avx512f")]
37074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37075#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37076#[rustc_legacy_const_generics(4, 5)]
37077pub fn _mm_mask_getmant_sd<
37078 const NORM: _MM_MANTISSA_NORM_ENUM,
37079 const SIGN: _MM_MANTISSA_SIGN_ENUM,
37080>(
37081 src: __m128d,
37082 k: __mmask8,
37083 a: __m128d,
37084 b: __m128d,
37085) -> __m128d {
37086 unsafe {
37087 static_assert_uimm_bits!(NORM, 4);
37088 static_assert_uimm_bits!(SIGN, 2);
37089 let a: f64x2 = a.as_f64x2();
37090 let b: f64x2 = b.as_f64x2();
37091 let src: f64x2 = src.as_f64x2();
37092 let r: f64x2 = vgetmantsd(a, b, SIGN << 2 | NORM, src, m:k, _MM_FROUND_CUR_DIRECTION);
37093 transmute(src:r)
37094 }
37095}
37096
37097/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
37098/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
37099/// _MM_MANT_NORM_1_2 // interval [1, 2)\
37100/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
37101/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
37102/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
37103/// The sign is determined by sc which can take the following values:\
37104/// _MM_MANT_SIGN_src // sign = sign(src)\
37105/// _MM_MANT_SIGN_zero // sign = 0\
37106/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
37107/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
37108///
37109/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_sd&expand=2897)
37110#[inline]
37111#[target_feature(enable = "avx512f")]
37112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37113#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0))]
37114#[rustc_legacy_const_generics(3, 4)]
37115pub fn _mm_maskz_getmant_sd<
37116 const NORM: _MM_MANTISSA_NORM_ENUM,
37117 const SIGN: _MM_MANTISSA_SIGN_ENUM,
37118>(
37119 k: __mmask8,
37120 a: __m128d,
37121 b: __m128d,
37122) -> __m128d {
37123 unsafe {
37124 static_assert_uimm_bits!(NORM, 4);
37125 static_assert_uimm_bits!(SIGN, 2);
37126 let a: f64x2 = a.as_f64x2();
37127 let b: f64x2 = b.as_f64x2();
37128 let r: f64x2 = vgetmantsd(
37129 a,
37130 b,
37131 SIGN << 2 | NORM,
37132 src:f64x2::ZERO,
37133 m:k,
37134 _MM_FROUND_CUR_DIRECTION,
37135 );
37136 transmute(src:r)
37137 }
37138}
37139
37140/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37141/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37142/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37143/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37144/// * [`_MM_FROUND_TO_POS_INF`] : round up
37145/// * [`_MM_FROUND_TO_ZERO`] : truncate
37146/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37147///
37148/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_ss&expand=4802)
37149#[inline]
37150#[target_feature(enable = "avx512f")]
37151#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37152#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 255))]
37153#[rustc_legacy_const_generics(2)]
37154pub fn _mm_roundscale_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
37155 unsafe {
37156 static_assert_uimm_bits!(IMM8, 8);
37157 let a: f32x4 = a.as_f32x4();
37158 let b: f32x4 = b.as_f32x4();
37159 let r: f32x4 = vrndscaless(
37160 a,
37161 b,
37162 src:f32x4::ZERO,
37163 mask:0b11111111,
37164 IMM8,
37165 _MM_FROUND_CUR_DIRECTION,
37166 );
37167 transmute(src:r)
37168 }
37169}
37170
37171/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37172/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37173/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37174/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37175/// * [`_MM_FROUND_TO_POS_INF`] : round up
37176/// * [`_MM_FROUND_TO_ZERO`] : truncate
37177/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37178///
37179/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_ss&expand=4800)
37180#[inline]
37181#[target_feature(enable = "avx512f")]
37182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37183#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
37184#[rustc_legacy_const_generics(4)]
37185pub fn _mm_mask_roundscale_ss<const IMM8: i32>(
37186 src: __m128,
37187 k: __mmask8,
37188 a: __m128,
37189 b: __m128,
37190) -> __m128 {
37191 unsafe {
37192 static_assert_uimm_bits!(IMM8, 8);
37193 let a: f32x4 = a.as_f32x4();
37194 let b: f32x4 = b.as_f32x4();
37195 let src: f32x4 = src.as_f32x4();
37196 let r: f32x4 = vrndscaless(a, b, src, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
37197 transmute(src:r)
37198 }
37199}
37200
37201/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37202/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37203/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37204/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37205/// * [`_MM_FROUND_TO_POS_INF`] : round up
37206/// * [`_MM_FROUND_TO_ZERO`] : truncate
37207/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37208///
37209/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_ss&expand=4801)
37210#[inline]
37211#[target_feature(enable = "avx512f")]
37212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37213#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0))]
37214#[rustc_legacy_const_generics(3)]
37215pub fn _mm_maskz_roundscale_ss<const IMM8: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37216 unsafe {
37217 static_assert_uimm_bits!(IMM8, 8);
37218 let a: f32x4 = a.as_f32x4();
37219 let b: f32x4 = b.as_f32x4();
37220 let r: f32x4 = vrndscaless(a, b, src:f32x4::ZERO, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
37221 transmute(src:r)
37222 }
37223}
37224
37225/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37226/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37227/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37228/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37229/// * [`_MM_FROUND_TO_POS_INF`] : round up
37230/// * [`_MM_FROUND_TO_ZERO`] : truncate
37231/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37232///
37233/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_sd&expand=4799)
37234#[inline]
37235#[target_feature(enable = "avx512f")]
37236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37237#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 255))]
37238#[rustc_legacy_const_generics(2)]
37239pub fn _mm_roundscale_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d {
37240 unsafe {
37241 static_assert_uimm_bits!(IMM8, 8);
37242 let a: f64x2 = a.as_f64x2();
37243 let b: f64x2 = b.as_f64x2();
37244 let r: f64x2 = vrndscalesd(
37245 a,
37246 b,
37247 src:f64x2::ZERO,
37248 mask:0b11111111,
37249 IMM8,
37250 _MM_FROUND_CUR_DIRECTION,
37251 );
37252 transmute(src:r)
37253 }
37254}
37255
37256/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37257/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37258/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37259/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37260/// * [`_MM_FROUND_TO_POS_INF`] : round up
37261/// * [`_MM_FROUND_TO_ZERO`] : truncate
37262/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37263///
37264/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_sd&expand=4797)
37265#[inline]
37266#[target_feature(enable = "avx512f")]
37267#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37268#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
37269#[rustc_legacy_const_generics(4)]
37270pub fn _mm_mask_roundscale_sd<const IMM8: i32>(
37271 src: __m128d,
37272 k: __mmask8,
37273 a: __m128d,
37274 b: __m128d,
37275) -> __m128d {
37276 unsafe {
37277 static_assert_uimm_bits!(IMM8, 8);
37278 let a: f64x2 = a.as_f64x2();
37279 let b: f64x2 = b.as_f64x2();
37280 let src: f64x2 = src.as_f64x2();
37281 let r: f64x2 = vrndscalesd(a, b, src, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
37282 transmute(src:r)
37283 }
37284}
37285
37286/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
37287/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
37288/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
37289/// * [`_MM_FROUND_TO_NEG_INF`] : round down
37290/// * [`_MM_FROUND_TO_POS_INF`] : round up
37291/// * [`_MM_FROUND_TO_ZERO`] : truncate
37292/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37293///
37294/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_sd&expand=4798)
37295#[inline]
37296#[target_feature(enable = "avx512f")]
37297#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37298#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0))]
37299#[rustc_legacy_const_generics(3)]
37300pub fn _mm_maskz_roundscale_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37301 unsafe {
37302 static_assert_uimm_bits!(IMM8, 8);
37303 let a: f64x2 = a.as_f64x2();
37304 let b: f64x2 = b.as_f64x2();
37305 let r: f64x2 = vrndscalesd(a, b, src:f64x2::ZERO, mask:k, IMM8, _MM_FROUND_CUR_DIRECTION);
37306 transmute(src:r)
37307 }
37308}
37309
37310/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37311///
37312/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_ss&expand=4901)
37313#[inline]
37314#[target_feature(enable = "avx512f")]
37315#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37316#[cfg_attr(test, assert_instr(vscalefss))]
37317pub fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
37318 unsafe {
37319 let a: f32x4 = a.as_f32x4();
37320 let b: f32x4 = b.as_f32x4();
37321 transmute(src:vscalefss(
37322 a,
37323 b,
37324 src:f32x4::ZERO,
37325 mask:0b11111111,
37326 _MM_FROUND_CUR_DIRECTION,
37327 ))
37328 }
37329}
37330
37331/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37332///
37333/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_ss&expand=4899)
37334#[inline]
37335#[target_feature(enable = "avx512f")]
37336#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37337#[cfg_attr(test, assert_instr(vscalefss))]
37338pub fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
37339 unsafe {
37340 let a: f32x4 = a.as_f32x4();
37341 let b: f32x4 = b.as_f32x4();
37342 let src: f32x4 = src.as_f32x4();
37343 transmute(src:vscalefss(a, b, src, mask:k, _MM_FROUND_CUR_DIRECTION))
37344 }
37345}
37346
37347/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37348///
37349/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_ss&expand=4900)
37350#[inline]
37351#[target_feature(enable = "avx512f")]
37352#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37353#[cfg_attr(test, assert_instr(vscalefss))]
37354pub fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37355 unsafe {
37356 transmute(src:vscalefss(
37357 a.as_f32x4(),
37358 b.as_f32x4(),
37359 src:f32x4::ZERO,
37360 mask:k,
37361 _MM_FROUND_CUR_DIRECTION,
37362 ))
37363 }
37364}
37365
37366/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
37367///
37368/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_sd&expand=4898)
37369#[inline]
37370#[target_feature(enable = "avx512f")]
37371#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37372#[cfg_attr(test, assert_instr(vscalefsd))]
37373pub fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
37374 unsafe {
37375 transmute(src:vscalefsd(
37376 a.as_f64x2(),
37377 b.as_f64x2(),
37378 src:f64x2::ZERO,
37379 mask:0b11111111,
37380 _MM_FROUND_CUR_DIRECTION,
37381 ))
37382 }
37383}
37384
37385/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37386///
37387/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_sd&expand=4896)
37388#[inline]
37389#[target_feature(enable = "avx512f")]
37390#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37391#[cfg_attr(test, assert_instr(vscalefsd))]
37392pub fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37393 unsafe {
37394 transmute(src:vscalefsd(
37395 a.as_f64x2(),
37396 b.as_f64x2(),
37397 src.as_f64x2(),
37398 mask:k,
37399 _MM_FROUND_CUR_DIRECTION,
37400 ))
37401 }
37402}
37403
37404/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37405///
37406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_sd&expand=4897)
37407#[inline]
37408#[target_feature(enable = "avx512f")]
37409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37410#[cfg_attr(test, assert_instr(vscalefsd))]
37411pub fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
37412 unsafe {
37413 transmute(src:vscalefsd(
37414 a.as_f64x2(),
37415 b.as_f64x2(),
37416 src:f64x2::ZERO,
37417 mask:k,
37418 _MM_FROUND_CUR_DIRECTION,
37419 ))
37420 }
37421}
37422
37423/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37424///
37425/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_ss&expand=2582)
37426#[inline]
37427#[target_feature(enable = "avx512f")]
37428#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37429#[cfg_attr(test, assert_instr(vfmadd))]
37430pub fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37431 unsafe {
37432 let mut fmadd: f32 = simd_extract!(a, 0);
37433 if (k & 0b00000001) != 0 {
37434 let extractb: f32 = simd_extract!(b, 0);
37435 let extractc: f32 = simd_extract!(c, 0);
37436 fmadd = fmaf32(a:fmadd, b:extractb, c:extractc);
37437 }
37438 simd_insert!(a, 0, fmadd)
37439 }
37440}
37441
37442/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37443///
37444/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_ss&expand=2584)
37445#[inline]
37446#[target_feature(enable = "avx512f")]
37447#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37448#[cfg_attr(test, assert_instr(vfmadd))]
37449pub fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37450 unsafe {
37451 let mut fmadd: f32 = 0.;
37452 if (k & 0b00000001) != 0 {
37453 let extracta: f32 = simd_extract!(a, 0);
37454 let extractb: f32 = simd_extract!(b, 0);
37455 let extractc: f32 = simd_extract!(c, 0);
37456 fmadd = fmaf32(a:extracta, b:extractb, c:extractc);
37457 }
37458 simd_insert!(a, 0, fmadd)
37459 }
37460}
37461
37462/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37463///
37464/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_ss&expand=2583)
37465#[inline]
37466#[target_feature(enable = "avx512f")]
37467#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37468#[cfg_attr(test, assert_instr(vfmadd))]
37469pub fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37470 unsafe {
37471 let mut fmadd: f32 = simd_extract!(c, 0);
37472 if (k & 0b00000001) != 0 {
37473 let extracta: f32 = simd_extract!(a, 0);
37474 let extractb: f32 = simd_extract!(b, 0);
37475 fmadd = fmaf32(a:extracta, b:extractb, c:fmadd);
37476 }
37477 simd_insert!(c, 0, fmadd)
37478 }
37479}
37480
37481/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37482///
37483/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_sd&expand=2578)
37484#[inline]
37485#[target_feature(enable = "avx512f")]
37486#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37487#[cfg_attr(test, assert_instr(vfmadd))]
37488pub fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37489 unsafe {
37490 let mut fmadd: f64 = simd_extract!(a, 0);
37491 if (k & 0b00000001) != 0 {
37492 let extractb: f64 = simd_extract!(b, 0);
37493 let extractc: f64 = simd_extract!(c, 0);
37494 fmadd = fmaf64(a:fmadd, b:extractb, c:extractc);
37495 }
37496 simd_insert!(a, 0, fmadd)
37497 }
37498}
37499
37500/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37501///
37502/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_sd&expand=2580)
37503#[inline]
37504#[target_feature(enable = "avx512f")]
37505#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37506#[cfg_attr(test, assert_instr(vfmadd))]
37507pub fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37508 unsafe {
37509 let mut fmadd: f64 = 0.;
37510 if (k & 0b00000001) != 0 {
37511 let extracta: f64 = simd_extract!(a, 0);
37512 let extractb: f64 = simd_extract!(b, 0);
37513 let extractc: f64 = simd_extract!(c, 0);
37514 fmadd = fmaf64(a:extracta, b:extractb, c:extractc);
37515 }
37516 simd_insert!(a, 0, fmadd)
37517 }
37518}
37519
37520/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37521///
37522/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_sd&expand=2579)
37523#[inline]
37524#[target_feature(enable = "avx512f")]
37525#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37526#[cfg_attr(test, assert_instr(vfmadd))]
37527pub fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37528 unsafe {
37529 let mut fmadd: f64 = simd_extract!(c, 0);
37530 if (k & 0b00000001) != 0 {
37531 let extracta: f64 = simd_extract!(a, 0);
37532 let extractb: f64 = simd_extract!(b, 0);
37533 fmadd = fmaf64(a:extracta, b:extractb, c:fmadd);
37534 }
37535 simd_insert!(c, 0, fmadd)
37536 }
37537}
37538
37539/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
37540///
37541/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_ss&expand=2668)
37542#[inline]
37543#[target_feature(enable = "avx512f")]
37544#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37545#[cfg_attr(test, assert_instr(vfmsub))]
37546pub fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37547 unsafe {
37548 let mut fmsub: f32 = simd_extract!(a, 0);
37549 if (k & 0b00000001) != 0 {
37550 let extractb: f32 = simd_extract!(b, 0);
37551 let extractc: f32 = simd_extract!(c, 0);
37552 let extractc: f32 = -extractc;
37553 fmsub = fmaf32(a:fmsub, b:extractb, c:extractc);
37554 }
37555 simd_insert!(a, 0, fmsub)
37556 }
37557}
37558
37559/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37560///
37561/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_ss&expand=2670)
37562#[inline]
37563#[target_feature(enable = "avx512f")]
37564#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37565#[cfg_attr(test, assert_instr(vfmsub))]
37566pub fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37567 unsafe {
37568 let mut fmsub: f32 = 0.;
37569 if (k & 0b00000001) != 0 {
37570 let extracta: f32 = simd_extract!(a, 0);
37571 let extractb: f32 = simd_extract!(b, 0);
37572 let extractc: f32 = simd_extract!(c, 0);
37573 let extractc: f32 = -extractc;
37574 fmsub = fmaf32(a:extracta, b:extractb, c:extractc);
37575 }
37576 simd_insert!(a, 0, fmsub)
37577 }
37578}
37579
37580/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37581///
37582/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_ss&expand=2669)
37583#[inline]
37584#[target_feature(enable = "avx512f")]
37585#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37586#[cfg_attr(test, assert_instr(vfmsub))]
37587pub fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37588 unsafe {
37589 let mut fmsub: f32 = simd_extract!(c, 0);
37590 if (k & 0b00000001) != 0 {
37591 let extracta: f32 = simd_extract!(a, 0);
37592 let extractb: f32 = simd_extract!(b, 0);
37593 let extractc: f32 = -fmsub;
37594 fmsub = fmaf32(a:extracta, b:extractb, c:extractc);
37595 }
37596 simd_insert!(c, 0, fmsub)
37597 }
37598}
37599
37600/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37601///
37602/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_sd&expand=2664)
37603#[inline]
37604#[target_feature(enable = "avx512f")]
37605#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37606#[cfg_attr(test, assert_instr(vfmsub))]
37607pub fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37608 unsafe {
37609 let mut fmsub: f64 = simd_extract!(a, 0);
37610 if (k & 0b00000001) != 0 {
37611 let extractb: f64 = simd_extract!(b, 0);
37612 let extractc: f64 = simd_extract!(c, 0);
37613 let extractc: f64 = -extractc;
37614 fmsub = fmaf64(a:fmsub, b:extractb, c:extractc);
37615 }
37616 simd_insert!(a, 0, fmsub)
37617 }
37618}
37619
37620/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37621///
37622/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_sd&expand=2666)
37623#[inline]
37624#[target_feature(enable = "avx512f")]
37625#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37626#[cfg_attr(test, assert_instr(vfmsub))]
37627pub fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37628 unsafe {
37629 let mut fmsub: f64 = 0.;
37630 if (k & 0b00000001) != 0 {
37631 let extracta: f64 = simd_extract!(a, 0);
37632 let extractb: f64 = simd_extract!(b, 0);
37633 let extractc: f64 = simd_extract!(c, 0);
37634 let extractc: f64 = -extractc;
37635 fmsub = fmaf64(a:extracta, b:extractb, c:extractc);
37636 }
37637 simd_insert!(a, 0, fmsub)
37638 }
37639}
37640
37641/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37642///
37643/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_sd&expand=2665)
37644#[inline]
37645#[target_feature(enable = "avx512f")]
37646#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37647#[cfg_attr(test, assert_instr(vfmsub))]
37648pub fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37649 unsafe {
37650 let mut fmsub: f64 = simd_extract!(c, 0);
37651 if (k & 0b00000001) != 0 {
37652 let extracta: f64 = simd_extract!(a, 0);
37653 let extractb: f64 = simd_extract!(b, 0);
37654 let extractc: f64 = -fmsub;
37655 fmsub = fmaf64(a:extracta, b:extractb, c:extractc);
37656 }
37657 simd_insert!(c, 0, fmsub)
37658 }
37659}
37660
37661/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37662///
37663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_ss&expand=2748)
37664#[inline]
37665#[target_feature(enable = "avx512f")]
37666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37667#[cfg_attr(test, assert_instr(vfnmadd))]
37668pub fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37669 unsafe {
37670 let mut fnmadd: f32 = simd_extract!(a, 0);
37671 if (k & 0b00000001) != 0 {
37672 let extracta: f32 = -fnmadd;
37673 let extractb: f32 = simd_extract!(b, 0);
37674 let extractc: f32 = simd_extract!(c, 0);
37675 fnmadd = fmaf32(a:extracta, b:extractb, c:extractc);
37676 }
37677 simd_insert!(a, 0, fnmadd)
37678 }
37679}
37680
37681/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37682///
37683/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_ss&expand=2750)
37684#[inline]
37685#[target_feature(enable = "avx512f")]
37686#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37687#[cfg_attr(test, assert_instr(vfnmadd))]
37688pub fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37689 unsafe {
37690 let mut fnmadd: f32 = 0.;
37691 if (k & 0b00000001) != 0 {
37692 let extracta: f32 = simd_extract!(a, 0);
37693 let extracta: f32 = -extracta;
37694 let extractb: f32 = simd_extract!(b, 0);
37695 let extractc: f32 = simd_extract!(c, 0);
37696 fnmadd = fmaf32(a:extracta, b:extractb, c:extractc);
37697 }
37698 simd_insert!(a, 0, fnmadd)
37699 }
37700}
37701
37702/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37703///
37704/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_ss&expand=2749)
37705#[inline]
37706#[target_feature(enable = "avx512f")]
37707#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37708#[cfg_attr(test, assert_instr(vfnmadd))]
37709pub fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37710 unsafe {
37711 let mut fnmadd: f32 = simd_extract!(c, 0);
37712 if (k & 0b00000001) != 0 {
37713 let extracta: f32 = simd_extract!(a, 0);
37714 let extracta: f32 = -extracta;
37715 let extractb: f32 = simd_extract!(b, 0);
37716 fnmadd = fmaf32(a:extracta, b:extractb, c:fnmadd);
37717 }
37718 simd_insert!(c, 0, fnmadd)
37719 }
37720}
37721
37722/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37723///
37724/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_sd&expand=2744)
37725#[inline]
37726#[target_feature(enable = "avx512f")]
37727#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37728#[cfg_attr(test, assert_instr(vfnmadd))]
37729pub fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37730 unsafe {
37731 let mut fnmadd: f64 = simd_extract!(a, 0);
37732 if (k & 0b00000001) != 0 {
37733 let extracta: f64 = -fnmadd;
37734 let extractb: f64 = simd_extract!(b, 0);
37735 let extractc: f64 = simd_extract!(c, 0);
37736 fnmadd = fmaf64(a:extracta, b:extractb, c:extractc);
37737 }
37738 simd_insert!(a, 0, fnmadd)
37739 }
37740}
37741
37742/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37743///
37744/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_sd&expand=2746)
37745#[inline]
37746#[target_feature(enable = "avx512f")]
37747#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37748#[cfg_attr(test, assert_instr(vfnmadd))]
37749pub fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37750 unsafe {
37751 let mut fnmadd: f64 = 0.;
37752 if (k & 0b00000001) != 0 {
37753 let extracta: f64 = simd_extract!(a, 0);
37754 let extracta: f64 = -extracta;
37755 let extractb: f64 = simd_extract!(b, 0);
37756 let extractc: f64 = simd_extract!(c, 0);
37757 fnmadd = fmaf64(a:extracta, b:extractb, c:extractc);
37758 }
37759 simd_insert!(a, 0, fnmadd)
37760 }
37761}
37762
37763/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37764///
37765/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_sd&expand=2745)
37766#[inline]
37767#[target_feature(enable = "avx512f")]
37768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37769#[cfg_attr(test, assert_instr(vfnmadd))]
37770pub fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37771 unsafe {
37772 let mut fnmadd: f64 = simd_extract!(c, 0);
37773 if (k & 0b00000001) != 0 {
37774 let extracta: f64 = simd_extract!(a, 0);
37775 let extracta: f64 = -extracta;
37776 let extractb: f64 = simd_extract!(b, 0);
37777 fnmadd = fmaf64(a:extracta, b:extractb, c:fnmadd);
37778 }
37779 simd_insert!(c, 0, fnmadd)
37780 }
37781}
37782
37783/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37784///
37785/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_ss&expand=2796)
37786#[inline]
37787#[target_feature(enable = "avx512f")]
37788#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37789#[cfg_attr(test, assert_instr(vfnmsub))]
37790pub fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
37791 unsafe {
37792 let mut fnmsub: f32 = simd_extract!(a, 0);
37793 if (k & 0b00000001) != 0 {
37794 let extracta: f32 = -fnmsub;
37795 let extractb: f32 = simd_extract!(b, 0);
37796 let extractc: f32 = simd_extract!(c, 0);
37797 let extractc: f32 = -extractc;
37798 fnmsub = fmaf32(a:extracta, b:extractb, c:extractc);
37799 }
37800 simd_insert!(a, 0, fnmsub)
37801 }
37802}
37803
37804/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
37805///
37806/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_ss&expand=2798)
37807#[inline]
37808#[target_feature(enable = "avx512f")]
37809#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37810#[cfg_attr(test, assert_instr(vfnmsub))]
37811pub fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
37812 unsafe {
37813 let mut fnmsub: f32 = 0.;
37814 if (k & 0b00000001) != 0 {
37815 let extracta: f32 = simd_extract!(a, 0);
37816 let extracta: f32 = -extracta;
37817 let extractb: f32 = simd_extract!(b, 0);
37818 let extractc: f32 = simd_extract!(c, 0);
37819 let extractc: f32 = -extractc;
37820 fnmsub = fmaf32(a:extracta, b:extractb, c:extractc);
37821 }
37822 simd_insert!(a, 0, fnmsub)
37823 }
37824}
37825
37826/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.
37827///
37828/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_ss&expand=2797)
37829#[inline]
37830#[target_feature(enable = "avx512f")]
37831#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37832#[cfg_attr(test, assert_instr(vfnmsub))]
37833pub fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
37834 unsafe {
37835 let mut fnmsub: f32 = simd_extract!(c, 0);
37836 if (k & 0b00000001) != 0 {
37837 let extracta: f32 = simd_extract!(a, 0);
37838 let extracta: f32 = -extracta;
37839 let extractb: f32 = simd_extract!(b, 0);
37840 let extractc: f32 = -fnmsub;
37841 fnmsub = fmaf32(a:extracta, b:extractb, c:extractc);
37842 }
37843 simd_insert!(c, 0, fnmsub)
37844 }
37845}
37846
37847/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37848///
37849/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_sd&expand=2792)
37850#[inline]
37851#[target_feature(enable = "avx512f")]
37852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37853#[cfg_attr(test, assert_instr(vfnmsub))]
37854pub fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
37855 unsafe {
37856 let mut fnmsub: f64 = simd_extract!(a, 0);
37857 if (k & 0b00000001) != 0 {
37858 let extracta: f64 = -fnmsub;
37859 let extractb: f64 = simd_extract!(b, 0);
37860 let extractc: f64 = simd_extract!(c, 0);
37861 let extractc: f64 = -extractc;
37862 fnmsub = fmaf64(a:extracta, b:extractb, c:extractc);
37863 }
37864 simd_insert!(a, 0, fnmsub)
37865 }
37866}
37867
37868/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
37869///
37870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_sd&expand=2794)
37871#[inline]
37872#[target_feature(enable = "avx512f")]
37873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37874#[cfg_attr(test, assert_instr(vfnmsub))]
37875pub fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
37876 unsafe {
37877 let mut fnmsub: f64 = 0.;
37878 if (k & 0b00000001) != 0 {
37879 let extracta: f64 = simd_extract!(a, 0);
37880 let extracta: f64 = -extracta;
37881 let extractb: f64 = simd_extract!(b, 0);
37882 let extractc: f64 = simd_extract!(c, 0);
37883 let extractc: f64 = -extractc;
37884 fnmsub = fmaf64(a:extracta, b:extractb, c:extractc);
37885 }
37886 simd_insert!(a, 0, fnmsub)
37887 }
37888}
37889
37890/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.
37891///
37892/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_sd&expand=2793)
37893#[inline]
37894#[target_feature(enable = "avx512f")]
37895#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37896#[cfg_attr(test, assert_instr(vfnmsub))]
37897pub fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
37898 unsafe {
37899 let mut fnmsub: f64 = simd_extract!(c, 0);
37900 if (k & 0b00000001) != 0 {
37901 let extracta: f64 = simd_extract!(a, 0);
37902 let extracta: f64 = -extracta;
37903 let extractb: f64 = simd_extract!(b, 0);
37904 let extractc: f64 = -fnmsub;
37905 fnmsub = fmaf64(a:extracta, b:extractb, c:extractc);
37906 }
37907 simd_insert!(c, 0, fnmsub)
37908 }
37909}
37910
37911/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
37912///
37913/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37914/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37915/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37916/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37917/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37918/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37919///
37920/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_ss&expand=151)
37921#[inline]
37922#[target_feature(enable = "avx512f")]
37923#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37924#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
37925#[rustc_legacy_const_generics(2)]
37926pub fn _mm_add_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
37927 unsafe {
37928 static_assert_rounding!(ROUNDING);
37929 let a: f32x4 = a.as_f32x4();
37930 let b: f32x4 = b.as_f32x4();
37931 let r: f32x4 = vaddss(a, b, src:f32x4::ZERO, mask:0b1, ROUNDING);
37932 transmute(src:r)
37933 }
37934}
37935
37936/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37937///
37938/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37939/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37940/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37941/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37942/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37943/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37944///
37945/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_ss&expand=152)
37946#[inline]
37947#[target_feature(enable = "avx512f")]
37948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37949#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
37950#[rustc_legacy_const_generics(4)]
37951pub fn _mm_mask_add_round_ss<const ROUNDING: i32>(
37952 src: __m128,
37953 k: __mmask8,
37954 a: __m128,
37955 b: __m128,
37956) -> __m128 {
37957 unsafe {
37958 static_assert_rounding!(ROUNDING);
37959 let a: f32x4 = a.as_f32x4();
37960 let b: f32x4 = b.as_f32x4();
37961 let src: f32x4 = src.as_f32x4();
37962 let r: f32x4 = vaddss(a, b, src, mask:k, ROUNDING);
37963 transmute(src:r)
37964 }
37965}
37966
37967/// Add the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
37968///
37969/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37970/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37971/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37972/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37973/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37974/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
37975///
37976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_ss&expand=153)
37977#[inline]
37978#[target_feature(enable = "avx512f")]
37979#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
37980#[cfg_attr(test, assert_instr(vaddss, ROUNDING = 8))]
37981#[rustc_legacy_const_generics(3)]
37982pub fn _mm_maskz_add_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
37983 unsafe {
37984 static_assert_rounding!(ROUNDING);
37985 let a: f32x4 = a.as_f32x4();
37986 let b: f32x4 = b.as_f32x4();
37987 let r: f32x4 = vaddss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
37988 transmute(src:r)
37989 }
37990}
37991
37992/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
37993///
37994/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
37995/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
37996/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
37997/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
37998/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
37999/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38000///
38001/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_add_round_sd&expand=148)
38002#[inline]
38003#[target_feature(enable = "avx512f")]
38004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38005#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38006#[rustc_legacy_const_generics(2)]
38007pub fn _mm_add_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38008 unsafe {
38009 static_assert_rounding!(ROUNDING);
38010 let a: f64x2 = a.as_f64x2();
38011 let b: f64x2 = b.as_f64x2();
38012 let r: f64x2 = vaddsd(a, b, src:f64x2::ZERO, mask:0b1, ROUNDING);
38013 transmute(src:r)
38014 }
38015}
38016
38017/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38018///
38019/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38020/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38021/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38022/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38023/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38024/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38025///
38026/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_add_round_sd&expand=149)
38027#[inline]
38028#[target_feature(enable = "avx512f")]
38029#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38030#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38031#[rustc_legacy_const_generics(4)]
38032pub fn _mm_mask_add_round_sd<const ROUNDING: i32>(
38033 src: __m128d,
38034 k: __mmask8,
38035 a: __m128d,
38036 b: __m128d,
38037) -> __m128d {
38038 unsafe {
38039 static_assert_rounding!(ROUNDING);
38040 let a: f64x2 = a.as_f64x2();
38041 let b: f64x2 = b.as_f64x2();
38042 let src: f64x2 = src.as_f64x2();
38043 let r: f64x2 = vaddsd(a, b, src, mask:k, ROUNDING);
38044 transmute(src:r)
38045 }
38046}
38047
38048/// Add the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38049///
38050/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38051/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38052/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38053/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38054/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38055/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38056///
38057/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_add_round_sd&expand=150)
38058#[inline]
38059#[target_feature(enable = "avx512f")]
38060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38061#[cfg_attr(test, assert_instr(vaddsd, ROUNDING = 8))]
38062#[rustc_legacy_const_generics(3)]
38063pub fn _mm_maskz_add_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38064 unsafe {
38065 static_assert_rounding!(ROUNDING);
38066 let a: f64x2 = a.as_f64x2();
38067 let b: f64x2 = b.as_f64x2();
38068 let r: f64x2 = vaddsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
38069 transmute(src:r)
38070 }
38071}
38072
38073/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38074///
38075/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38076/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38077/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38078/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38079/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38080/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38081///
38082/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_ss&expand=5745)
38083#[inline]
38084#[target_feature(enable = "avx512f")]
38085#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38086#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38087#[rustc_legacy_const_generics(2)]
38088pub fn _mm_sub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38089 unsafe {
38090 static_assert_rounding!(ROUNDING);
38091 let a: f32x4 = a.as_f32x4();
38092 let b: f32x4 = b.as_f32x4();
38093 let r: f32x4 = vsubss(a, b, src:f32x4::ZERO, mask:0b1, ROUNDING);
38094 transmute(src:r)
38095 }
38096}
38097
38098/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38099///
38100/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38101/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38102/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38103/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38104/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38105/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38106///
38107/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_ss&expand=5743)
38108#[inline]
38109#[target_feature(enable = "avx512f")]
38110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38111#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38112#[rustc_legacy_const_generics(4)]
38113pub fn _mm_mask_sub_round_ss<const ROUNDING: i32>(
38114 src: __m128,
38115 k: __mmask8,
38116 a: __m128,
38117 b: __m128,
38118) -> __m128 {
38119 unsafe {
38120 static_assert_rounding!(ROUNDING);
38121 let a: f32x4 = a.as_f32x4();
38122 let b: f32x4 = b.as_f32x4();
38123 let src: f32x4 = src.as_f32x4();
38124 let r: f32x4 = vsubss(a, b, src, mask:k, ROUNDING);
38125 transmute(src:r)
38126 }
38127}
38128
38129/// Subtract the lower single-precision (32-bit) floating-point element in b from the lower single-precision (32-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38130///
38131/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38132/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38133/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38134/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38135/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38136/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38137///
38138/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_ss&expand=5744)
38139#[inline]
38140#[target_feature(enable = "avx512f")]
38141#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38142#[cfg_attr(test, assert_instr(vsubss, ROUNDING = 8))]
38143#[rustc_legacy_const_generics(3)]
38144pub fn _mm_maskz_sub_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38145 unsafe {
38146 static_assert_rounding!(ROUNDING);
38147 let a: f32x4 = a.as_f32x4();
38148 let b: f32x4 = b.as_f32x4();
38149 let r: f32x4 = vsubss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
38150 transmute(src:r)
38151 }
38152}
38153
38154/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38155///
38156/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38157/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38158/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38159/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38160/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38161/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38162///
38163/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sub_round_sd&expand=5742)
38164#[inline]
38165#[target_feature(enable = "avx512f")]
38166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38167#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38168#[rustc_legacy_const_generics(2)]
38169pub fn _mm_sub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38170 unsafe {
38171 static_assert_rounding!(ROUNDING);
38172 let a: f64x2 = a.as_f64x2();
38173 let b: f64x2 = b.as_f64x2();
38174 let r: f64x2 = vsubsd(a, b, src:f64x2::ZERO, mask:0b1, ROUNDING);
38175 transmute(src:r)
38176 }
38177}
38178
38179/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38180///
38181/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38182/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38183/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38184/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38185/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38186/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38187///
38188/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sub_round_sd&expand=5740)
38189#[inline]
38190#[target_feature(enable = "avx512f")]
38191#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38192#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38193#[rustc_legacy_const_generics(4)]
38194pub fn _mm_mask_sub_round_sd<const ROUNDING: i32>(
38195 src: __m128d,
38196 k: __mmask8,
38197 a: __m128d,
38198 b: __m128d,
38199) -> __m128d {
38200 unsafe {
38201 static_assert_rounding!(ROUNDING);
38202 let a: f64x2 = a.as_f64x2();
38203 let b: f64x2 = b.as_f64x2();
38204 let src: f64x2 = src.as_f64x2();
38205 let r: f64x2 = vsubsd(a, b, src, mask:k, ROUNDING);
38206 transmute(src:r)
38207 }
38208}
38209
38210/// Subtract the lower double-precision (64-bit) floating-point element in b from the lower double-precision (64-bit) floating-point element in a, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38211///
38212/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38213/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38214/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38215/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38216/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38217/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38218///
38219/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sub_round_sd&expand=5741)
38220#[inline]
38221#[target_feature(enable = "avx512f")]
38222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38223#[cfg_attr(test, assert_instr(vsubsd, ROUNDING = 8))]
38224#[rustc_legacy_const_generics(3)]
38225pub fn _mm_maskz_sub_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38226 unsafe {
38227 static_assert_rounding!(ROUNDING);
38228 let a: f64x2 = a.as_f64x2();
38229 let b: f64x2 = b.as_f64x2();
38230 let r: f64x2 = vsubsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
38231 transmute(src:r)
38232 }
38233}
38234
38235/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38236///
38237/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38238/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38239/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38240/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38241/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38242/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38243///
38244/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_ss&expand=3946)
38245#[inline]
38246#[target_feature(enable = "avx512f")]
38247#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38248#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38249#[rustc_legacy_const_generics(2)]
38250pub fn _mm_mul_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38251 unsafe {
38252 static_assert_rounding!(ROUNDING);
38253 let a: f32x4 = a.as_f32x4();
38254 let b: f32x4 = b.as_f32x4();
38255 let r: f32x4 = vmulss(a, b, src:f32x4::ZERO, mask:0b1, ROUNDING);
38256 transmute(src:r)
38257 }
38258}
38259
38260/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38261///
38262/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38263/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38264/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38265/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38266/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38267/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38268///
38269/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_ss&expand=3944)
38270#[inline]
38271#[target_feature(enable = "avx512f")]
38272#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38273#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38274#[rustc_legacy_const_generics(4)]
38275pub fn _mm_mask_mul_round_ss<const ROUNDING: i32>(
38276 src: __m128,
38277 k: __mmask8,
38278 a: __m128,
38279 b: __m128,
38280) -> __m128 {
38281 unsafe {
38282 static_assert_rounding!(ROUNDING);
38283 let a: f32x4 = a.as_f32x4();
38284 let b: f32x4 = b.as_f32x4();
38285 let src: f32x4 = src.as_f32x4();
38286 let r: f32x4 = vmulss(a, b, src, mask:k, ROUNDING);
38287 transmute(src:r)
38288 }
38289}
38290
38291/// Multiply the lower single-precision (32-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38292///
38293/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38294/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38295/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38296/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38297/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38298/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38299///
38300/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_ss&expand=3945)
38301#[inline]
38302#[target_feature(enable = "avx512f")]
38303#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38304#[cfg_attr(test, assert_instr(vmulss, ROUNDING = 8))]
38305#[rustc_legacy_const_generics(3)]
38306pub fn _mm_maskz_mul_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38307 unsafe {
38308 static_assert_rounding!(ROUNDING);
38309 let a: f32x4 = a.as_f32x4();
38310 let b: f32x4 = b.as_f32x4();
38311 let r: f32x4 = vmulss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
38312 transmute(src:r)
38313 }
38314}
38315
38316/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38317///
38318/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38319/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38320/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38321/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38322/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38323/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38324///
38325/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mul_round_sd&expand=3943)
38326#[inline]
38327#[target_feature(enable = "avx512f")]
38328#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38329#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38330#[rustc_legacy_const_generics(2)]
38331pub fn _mm_mul_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38332 unsafe {
38333 static_assert_rounding!(ROUNDING);
38334 let a: f64x2 = a.as_f64x2();
38335 let b: f64x2 = b.as_f64x2();
38336 let r: f64x2 = vmulsd(a, b, src:f64x2::ZERO, mask:0b1, ROUNDING);
38337 transmute(src:r)
38338 }
38339}
38340
38341/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38342///
38343/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38344/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38345/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38346/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38347/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38348/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38349///
38350/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_mul_round_sd&expand=3941)
38351#[inline]
38352#[target_feature(enable = "avx512f")]
38353#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38354#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38355#[rustc_legacy_const_generics(4)]
38356pub fn _mm_mask_mul_round_sd<const ROUNDING: i32>(
38357 src: __m128d,
38358 k: __mmask8,
38359 a: __m128d,
38360 b: __m128d,
38361) -> __m128d {
38362 unsafe {
38363 static_assert_rounding!(ROUNDING);
38364 let a: f64x2 = a.as_f64x2();
38365 let b: f64x2 = b.as_f64x2();
38366 let src: f64x2 = src.as_f64x2();
38367 let r: f64x2 = vmulsd(a, b, src, mask:k, ROUNDING);
38368 transmute(src:r)
38369 }
38370}
38371
38372/// Multiply the lower double-precision (64-bit) floating-point element in a and b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38373///
38374/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38375/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38376/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38377/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38378/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38379/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38380///
38381/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_mul_round_sd&expand=3942)
38382#[inline]
38383#[target_feature(enable = "avx512f")]
38384#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38385#[cfg_attr(test, assert_instr(vmulsd, ROUNDING = 8))]
38386#[rustc_legacy_const_generics(3)]
38387pub fn _mm_maskz_mul_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38388 unsafe {
38389 static_assert_rounding!(ROUNDING);
38390 let a: f64x2 = a.as_f64x2();
38391 let b: f64x2 = b.as_f64x2();
38392 let r: f64x2 = vmulsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
38393 transmute(src:r)
38394 }
38395}
38396
38397/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38398///
38399/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38400/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38401/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38402/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38403/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38404/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38405///
38406/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_ss&expand=2174)
38407#[inline]
38408#[target_feature(enable = "avx512f")]
38409#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38410#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38411#[rustc_legacy_const_generics(2)]
38412pub fn _mm_div_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38413 unsafe {
38414 static_assert_rounding!(ROUNDING);
38415 let a: f32x4 = a.as_f32x4();
38416 let b: f32x4 = b.as_f32x4();
38417 let r: f32x4 = vdivss(a, b, src:f32x4::ZERO, mask:0b1, ROUNDING);
38418 transmute(src:r)
38419 }
38420}
38421
38422/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38423///
38424/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38425/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38426/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38427/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38428/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38429/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38430///
38431/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_ss&expand=2175)
38432#[inline]
38433#[target_feature(enable = "avx512f")]
38434#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38435#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38436#[rustc_legacy_const_generics(4)]
38437pub fn _mm_mask_div_round_ss<const ROUNDING: i32>(
38438 src: __m128,
38439 k: __mmask8,
38440 a: __m128,
38441 b: __m128,
38442) -> __m128 {
38443 unsafe {
38444 static_assert_rounding!(ROUNDING);
38445 let a: f32x4 = a.as_f32x4();
38446 let b: f32x4 = b.as_f32x4();
38447 let src: f32x4 = src.as_f32x4();
38448 let r: f32x4 = vdivss(a, b, src, mask:k, ROUNDING);
38449 transmute(src:r)
38450 }
38451}
38452
38453/// Divide the lower single-precision (32-bit) floating-point element in a by the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38454///
38455/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38456/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38457/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38458/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38459/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38460/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38461///
38462/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_ss&expand=2176)
38463#[inline]
38464#[target_feature(enable = "avx512f")]
38465#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38466#[cfg_attr(test, assert_instr(vdivss, ROUNDING = 8))]
38467#[rustc_legacy_const_generics(3)]
38468pub fn _mm_maskz_div_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38469 unsafe {
38470 static_assert_rounding!(ROUNDING);
38471 let a: f32x4 = a.as_f32x4();
38472 let b: f32x4 = b.as_f32x4();
38473 let r: f32x4 = vdivss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
38474 transmute(src:r)
38475 }
38476}
38477
38478/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38479///
38480/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38481/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38482/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38483/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38484/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38485/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38486///
38487/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_div_round_sd&expand=2171)
38488#[inline]
38489#[target_feature(enable = "avx512f")]
38490#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38491#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38492#[rustc_legacy_const_generics(2)]
38493pub fn _mm_div_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38494 unsafe {
38495 static_assert_rounding!(ROUNDING);
38496 let a: f64x2 = a.as_f64x2();
38497 let b: f64x2 = b.as_f64x2();
38498 let r: f64x2 = vdivsd(a, b, src:f64x2::ZERO, mask:0b1, ROUNDING);
38499 transmute(src:r)
38500 }
38501}
38502
38503/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38504///
38505/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38506/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38507/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38508/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38509/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38510/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38511///
38512/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_div_round_sd&expand=2172)
38513#[inline]
38514#[target_feature(enable = "avx512f")]
38515#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38516#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38517#[rustc_legacy_const_generics(4)]
38518pub fn _mm_mask_div_round_sd<const ROUNDING: i32>(
38519 src: __m128d,
38520 k: __mmask8,
38521 a: __m128d,
38522 b: __m128d,
38523) -> __m128d {
38524 unsafe {
38525 static_assert_rounding!(ROUNDING);
38526 let a: f64x2 = a.as_f64x2();
38527 let b: f64x2 = b.as_f64x2();
38528 let src: f64x2 = src.as_f64x2();
38529 let r: f64x2 = vdivsd(a, b, src, mask:k, ROUNDING);
38530 transmute(src:r)
38531 }
38532}
38533
38534/// Divide the lower double-precision (64-bit) floating-point element in a by the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38535///
38536/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38537/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38538/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38539/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38540/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38541/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38542///
38543/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_div_round_sd&expand=2173)
38544#[inline]
38545#[target_feature(enable = "avx512f")]
38546#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38547#[cfg_attr(test, assert_instr(vdivsd, ROUNDING = 8))]
38548#[rustc_legacy_const_generics(3)]
38549pub fn _mm_maskz_div_round_sd<const ROUNDING: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38550 unsafe {
38551 static_assert_rounding!(ROUNDING);
38552 let a: f64x2 = a.as_f64x2();
38553 let b: f64x2 = b.as_f64x2();
38554 let r: f64x2 = vdivsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
38555 transmute(src:r)
38556 }
38557}
38558
38559/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38560/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38561///
38562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_ss&expand=3668)
38563#[inline]
38564#[target_feature(enable = "avx512f")]
38565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38566#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38567#[rustc_legacy_const_generics(2)]
38568pub fn _mm_max_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38569 unsafe {
38570 static_assert_sae!(SAE);
38571 let a: f32x4 = a.as_f32x4();
38572 let b: f32x4 = b.as_f32x4();
38573 let r: f32x4 = vmaxss(a, b, src:f32x4::ZERO, mask:0b1, SAE);
38574 transmute(src:r)
38575 }
38576}
38577
38578/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38579/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38580///
38581/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_max_round_ss&expand=3672)
38582#[inline]
38583#[target_feature(enable = "avx512f")]
38584#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38585#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38586#[rustc_legacy_const_generics(4)]
38587pub fn _mm_mask_max_round_ss<const SAE: i32>(
38588 src: __m128,
38589 k: __mmask8,
38590 a: __m128,
38591 b: __m128,
38592) -> __m128 {
38593 unsafe {
38594 static_assert_sae!(SAE);
38595 let a: f32x4 = a.as_f32x4();
38596 let b: f32x4 = b.as_f32x4();
38597 let src: f32x4 = src.as_f32x4();
38598 let r: f32x4 = vmaxss(a, b, src, mask:k, SAE);
38599 transmute(src:r)
38600 }
38601}
38602
38603/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38604/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38605///
38606/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_max_round_ss&expand=3667)
38607#[inline]
38608#[target_feature(enable = "avx512f")]
38609#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38610#[cfg_attr(test, assert_instr(vmaxss, SAE = 8))]
38611#[rustc_legacy_const_generics(3)]
38612pub fn _mm_maskz_max_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38613 unsafe {
38614 static_assert_sae!(SAE);
38615 let a: f32x4 = a.as_f32x4();
38616 let b: f32x4 = b.as_f32x4();
38617 let r: f32x4 = vmaxss(a, b, src:f32x4::ZERO, mask:k, SAE);
38618 transmute(src:r)
38619 }
38620}
38621
38622/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38623/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38624///
38625/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_max_round_sd&expand=3665)
38626#[inline]
38627#[target_feature(enable = "avx512f")]
38628#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38629#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38630#[rustc_legacy_const_generics(2)]
38631pub fn _mm_max_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38632 unsafe {
38633 static_assert_sae!(SAE);
38634 let a: f64x2 = a.as_f64x2();
38635 let b: f64x2 = b.as_f64x2();
38636 let r: f64x2 = vmaxsd(a, b, src:f64x2::ZERO, mask:0b1, SAE);
38637 transmute(src:r)
38638 }
38639}
38640
38641/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38642/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38643///
38644/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_max_round_sd&expand=3663)
38645#[inline]
38646#[target_feature(enable = "avx512f")]
38647#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38648#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38649#[rustc_legacy_const_generics(4)]
38650pub fn _mm_mask_max_round_sd<const SAE: i32>(
38651 src: __m128d,
38652 k: __mmask8,
38653 a: __m128d,
38654 b: __m128d,
38655) -> __m128d {
38656 unsafe {
38657 static_assert_sae!(SAE);
38658 let a: f64x2 = a.as_f64x2();
38659 let b: f64x2 = b.as_f64x2();
38660 let src: f64x2 = src.as_f64x2();
38661 let r: f64x2 = vmaxsd(a, b, src, mask:k, SAE);
38662 transmute(src:r)
38663 }
38664}
38665
38666/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the maximum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38667/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38668///
38669/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_max_round_sd&expand=3670)
38670#[inline]
38671#[target_feature(enable = "avx512f")]
38672#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38673#[cfg_attr(test, assert_instr(vmaxsd, SAE = 8))]
38674#[rustc_legacy_const_generics(3)]
38675pub fn _mm_maskz_max_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38676 unsafe {
38677 static_assert_sae!(SAE);
38678 let a: f64x2 = a.as_f64x2();
38679 let b: f64x2 = b.as_f64x2();
38680 let r: f64x2 = vmaxsd(a, b, src:f64x2::ZERO, mask:k, SAE);
38681 transmute(src:r)
38682 }
38683}
38684
38685/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38686/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38687///
38688/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_ss&expand=3782)
38689#[inline]
38690#[target_feature(enable = "avx512f")]
38691#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38692#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38693#[rustc_legacy_const_generics(2)]
38694pub fn _mm_min_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38695 unsafe {
38696 static_assert_sae!(SAE);
38697 let a: f32x4 = a.as_f32x4();
38698 let b: f32x4 = b.as_f32x4();
38699 let r: f32x4 = vminss(a, b, src:f32x4::ZERO, mask:0b1, SAE);
38700 transmute(src:r)
38701 }
38702}
38703
38704/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38705/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38706///
38707/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_ss&expand=3780)
38708#[inline]
38709#[target_feature(enable = "avx512f")]
38710#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38711#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38712#[rustc_legacy_const_generics(4)]
38713pub fn _mm_mask_min_round_ss<const SAE: i32>(
38714 src: __m128,
38715 k: __mmask8,
38716 a: __m128,
38717 b: __m128,
38718) -> __m128 {
38719 unsafe {
38720 static_assert_sae!(SAE);
38721 let a: f32x4 = a.as_f32x4();
38722 let b: f32x4 = b.as_f32x4();
38723 let src: f32x4 = src.as_f32x4();
38724 let r: f32x4 = vminss(a, b, src, mask:k, SAE);
38725 transmute(src:r)
38726 }
38727}
38728
38729/// Compare the lower single-precision (32-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38730/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38731///
38732/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_ss&expand=3781)
38733#[inline]
38734#[target_feature(enable = "avx512f")]
38735#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38736#[cfg_attr(test, assert_instr(vminss, SAE = 8))]
38737#[rustc_legacy_const_generics(3)]
38738pub fn _mm_maskz_min_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38739 unsafe {
38740 static_assert_sae!(SAE);
38741 let a: f32x4 = a.as_f32x4();
38742 let b: f32x4 = b.as_f32x4();
38743 let r: f32x4 = vminss(a, b, src:f32x4::ZERO, mask:k, SAE);
38744 transmute(src:r)
38745 }
38746}
38747
38748/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst , and copy the upper element from a to the upper element of dst.\
38749/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38750///
38751/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_min_round_sd&expand=3779)
38752#[inline]
38753#[target_feature(enable = "avx512f")]
38754#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38755#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38756#[rustc_legacy_const_generics(2)]
38757pub fn _mm_min_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
38758 unsafe {
38759 static_assert_sae!(SAE);
38760 let a: f64x2 = a.as_f64x2();
38761 let b: f64x2 = b.as_f64x2();
38762 let r: f64x2 = vminsd(a, b, src:f64x2::ZERO, mask:0b1, SAE);
38763 transmute(src:r)
38764 }
38765}
38766
38767/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38768/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38769///
38770/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_min_round_sd&expand=3777)
38771#[inline]
38772#[target_feature(enable = "avx512f")]
38773#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38774#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38775#[rustc_legacy_const_generics(4)]
38776pub fn _mm_mask_min_round_sd<const SAE: i32>(
38777 src: __m128d,
38778 k: __mmask8,
38779 a: __m128d,
38780 b: __m128d,
38781) -> __m128d {
38782 unsafe {
38783 static_assert_sae!(SAE);
38784 let a: f64x2 = a.as_f64x2();
38785 let b: f64x2 = b.as_f64x2();
38786 let src: f64x2 = src.as_f64x2();
38787 let r: f64x2 = vminsd(a, b, src, mask:k, SAE);
38788 transmute(src:r)
38789 }
38790}
38791
38792/// Compare the lower double-precision (64-bit) floating-point elements in a and b, store the minimum value in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38793/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38794///
38795/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_min_round_sd&expand=3778)
38796#[inline]
38797#[target_feature(enable = "avx512f")]
38798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38799#[cfg_attr(test, assert_instr(vminsd, SAE = 8))]
38800#[rustc_legacy_const_generics(3)]
38801pub fn _mm_maskz_min_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
38802 unsafe {
38803 static_assert_sae!(SAE);
38804 let a: f64x2 = a.as_f64x2();
38805 let b: f64x2 = b.as_f64x2();
38806 let r: f64x2 = vminsd(a, b, src:f64x2::ZERO, mask:k, SAE);
38807 transmute(src:r)
38808 }
38809}
38810
38811/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
38812///
38813/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38814/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38815/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38816/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38817/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38818/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38819///
38820/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_ss&expand=5383)
38821#[inline]
38822#[target_feature(enable = "avx512f")]
38823#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38824#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38825#[rustc_legacy_const_generics(2)]
38826pub fn _mm_sqrt_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
38827 unsafe {
38828 static_assert_rounding!(ROUNDING);
38829 vsqrtss(a, b, src:_mm_setzero_ps(), mask:0b1, ROUNDING)
38830 }
38831}
38832
38833/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38834///
38835/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38836/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38837/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38838/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38839/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38840/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38841///
38842/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_ss&expand=5381)
38843#[inline]
38844#[target_feature(enable = "avx512f")]
38845#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38846#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38847#[rustc_legacy_const_generics(4)]
38848pub fn _mm_mask_sqrt_round_ss<const ROUNDING: i32>(
38849 src: __m128,
38850 k: __mmask8,
38851 a: __m128,
38852 b: __m128,
38853) -> __m128 {
38854 unsafe {
38855 static_assert_rounding!(ROUNDING);
38856 vsqrtss(a, b, src, mask:k, ROUNDING)
38857 }
38858}
38859
38860/// Compute the square root of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
38861///
38862/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38863/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38864/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38865/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38866/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38867/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38868///
38869/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_ss&expand=5382)
38870#[inline]
38871#[target_feature(enable = "avx512f")]
38872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38873#[cfg_attr(test, assert_instr(vsqrtss, ROUNDING = 8))]
38874#[rustc_legacy_const_generics(3)]
38875pub fn _mm_maskz_sqrt_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
38876 unsafe {
38877 static_assert_rounding!(ROUNDING);
38878 vsqrtss(a, b, src:_mm_setzero_ps(), mask:k, ROUNDING)
38879 }
38880}
38881
38882/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
38883///
38884/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38885/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38886/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38887/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38888/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38889/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38890///
38891/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_sqrt_round_sd&expand=5380)
38892#[inline]
38893#[target_feature(enable = "avx512f")]
38894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38895#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38896#[rustc_legacy_const_generics(2)]
38897pub fn _mm_sqrt_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
38898 unsafe {
38899 static_assert_rounding!(ROUNDING);
38900 vsqrtsd(a, b, src:_mm_setzero_pd(), mask:0b1, ROUNDING)
38901 }
38902}
38903
38904/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38905///
38906/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38907/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38908/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38909/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38910/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38911/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38912///
38913/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_sqrt_round_sd&expand=5378)
38914#[inline]
38915#[target_feature(enable = "avx512f")]
38916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38917#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38918#[rustc_legacy_const_generics(4)]
38919pub fn _mm_mask_sqrt_round_sd<const ROUNDING: i32>(
38920 src: __m128d,
38921 k: __mmask8,
38922 a: __m128d,
38923 b: __m128d,
38924) -> __m128d {
38925 unsafe {
38926 static_assert_rounding!(ROUNDING);
38927 vsqrtsd(a, b, src, mask:k, ROUNDING)
38928 }
38929}
38930
38931/// Compute the square root of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
38932///
38933/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
38934/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
38935/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
38936/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
38937/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
38938/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
38939///
38940/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_sqrt_round_sd&expand=5379)
38941#[inline]
38942#[target_feature(enable = "avx512f")]
38943#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38944#[cfg_attr(test, assert_instr(vsqrtsd, ROUNDING = 8))]
38945#[rustc_legacy_const_generics(3)]
38946pub fn _mm_maskz_sqrt_round_sd<const ROUNDING: i32>(
38947 k: __mmask8,
38948 a: __m128d,
38949 b: __m128d,
38950) -> __m128d {
38951 unsafe {
38952 static_assert_rounding!(ROUNDING);
38953 vsqrtsd(a, b, src:_mm_setzero_pd(), mask:k, ROUNDING)
38954 }
38955}
38956
38957/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
38958/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38959///
38960/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_ss&expand=2856)
38961#[inline]
38962#[target_feature(enable = "avx512f")]
38963#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38964#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
38965#[rustc_legacy_const_generics(2)]
38966pub fn _mm_getexp_round_ss<const SAE: i32>(a: __m128, b: __m128) -> __m128 {
38967 unsafe {
38968 static_assert_sae!(SAE);
38969 let a: f32x4 = a.as_f32x4();
38970 let b: f32x4 = b.as_f32x4();
38971 let r: f32x4 = vgetexpss(a, b, src:f32x4::ZERO, mask:0b1, SAE);
38972 transmute(src:r)
38973 }
38974}
38975
38976/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
38977/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
38978///
38979/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_ss&expand=2857)
38980#[inline]
38981#[target_feature(enable = "avx512f")]
38982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
38983#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
38984#[rustc_legacy_const_generics(4)]
38985pub fn _mm_mask_getexp_round_ss<const SAE: i32>(
38986 src: __m128,
38987 k: __mmask8,
38988 a: __m128,
38989 b: __m128,
38990) -> __m128 {
38991 unsafe {
38992 static_assert_sae!(SAE);
38993 let a: f32x4 = a.as_f32x4();
38994 let b: f32x4 = b.as_f32x4();
38995 let src: f32x4 = src.as_f32x4();
38996 let r: f32x4 = vgetexpss(a, b, src, mask:k, SAE);
38997 transmute(src:r)
38998 }
38999}
39000
39001/// Convert the exponent of the lower single-precision (32-bit) floating-point element in b to a single-precision (32-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39002/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39003///
39004/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_ss&expand=2858)
39005#[inline]
39006#[target_feature(enable = "avx512f")]
39007#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39008#[cfg_attr(test, assert_instr(vgetexpss, SAE = 8))]
39009#[rustc_legacy_const_generics(3)]
39010pub fn _mm_maskz_getexp_round_ss<const SAE: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39011 unsafe {
39012 static_assert_sae!(SAE);
39013 let a: f32x4 = a.as_f32x4();
39014 let b: f32x4 = b.as_f32x4();
39015 let r: f32x4 = vgetexpss(a, b, src:f32x4::ZERO, mask:k, SAE);
39016 transmute(src:r)
39017 }
39018}
39019
39020/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39021/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39022///
39023/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getexp_round_sd&expand=2853)
39024#[inline]
39025#[target_feature(enable = "avx512f")]
39026#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39027#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39028#[rustc_legacy_const_generics(2)]
39029pub fn _mm_getexp_round_sd<const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39030 unsafe {
39031 static_assert_sae!(SAE);
39032 let a: f64x2 = a.as_f64x2();
39033 let b: f64x2 = b.as_f64x2();
39034 let r: f64x2 = vgetexpsd(a, b, src:f64x2::ZERO, mask:0b1, SAE);
39035 transmute(src:r)
39036 }
39037}
39038
39039/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39040/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39041///
39042/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getexp_round_sd&expand=2854)
39043#[inline]
39044#[target_feature(enable = "avx512f")]
39045#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39046#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39047#[rustc_legacy_const_generics(4)]
39048pub fn _mm_mask_getexp_round_sd<const SAE: i32>(
39049 src: __m128d,
39050 k: __mmask8,
39051 a: __m128d,
39052 b: __m128d,
39053) -> __m128d {
39054 unsafe {
39055 static_assert_sae!(SAE);
39056 let a: f64x2 = a.as_f64x2();
39057 let b: f64x2 = b.as_f64x2();
39058 let src: f64x2 = src.as_f64x2();
39059 let r: f64x2 = vgetexpsd(a, b, src, mask:k, SAE);
39060 transmute(src:r)
39061 }
39062}
39063
39064/// Convert the exponent of the lower double-precision (64-bit) floating-point element in b to a double-precision (64-bit) floating-point number representing the integer exponent, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates floor(log2(x)) for the lower element.\
39065/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39066///
39067/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getexp_round_sd&expand=2855)
39068#[inline]
39069#[target_feature(enable = "avx512f")]
39070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39071#[cfg_attr(test, assert_instr(vgetexpsd, SAE = 8))]
39072#[rustc_legacy_const_generics(3)]
39073pub fn _mm_maskz_getexp_round_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
39074 unsafe {
39075 static_assert_sae!(SAE);
39076 let a: f64x2 = a.as_f64x2();
39077 let b: f64x2 = b.as_f64x2();
39078 let r: f64x2 = vgetexpsd(a, b, src:f64x2::ZERO, mask:k, SAE);
39079 transmute(src:r)
39080 }
39081}
39082
39083/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39084/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39085/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39086/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39087/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39088/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39089/// The sign is determined by sc which can take the following values:\
39090/// _MM_MANT_SIGN_src // sign = sign(src)\
39091/// _MM_MANT_SIGN_zero // sign = 0\
39092/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39093/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39094///
39095/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_ss&expand=2892)
39096#[inline]
39097#[target_feature(enable = "avx512f")]
39098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39099#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39100#[rustc_legacy_const_generics(2, 3, 4)]
39101pub fn _mm_getmant_round_ss<
39102 const NORM: _MM_MANTISSA_NORM_ENUM,
39103 const SIGN: _MM_MANTISSA_SIGN_ENUM,
39104 const SAE: i32,
39105>(
39106 a: __m128,
39107 b: __m128,
39108) -> __m128 {
39109 unsafe {
39110 static_assert_uimm_bits!(NORM, 4);
39111 static_assert_uimm_bits!(SIGN, 2);
39112 static_assert_mantissas_sae!(SAE);
39113 let a: f32x4 = a.as_f32x4();
39114 let b: f32x4 = b.as_f32x4();
39115 let r: f32x4 = vgetmantss(a, b, SIGN << 2 | NORM, src:f32x4::ZERO, m:0b1, SAE);
39116 transmute(src:r)
39117 }
39118}
39119
39120/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39121/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39122/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39123/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39124/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39125/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39126/// The sign is determined by sc which can take the following values:\
39127/// _MM_MANT_SIGN_src // sign = sign(src)\
39128/// _MM_MANT_SIGN_zero // sign = 0\
39129/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39130/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39131///
39132/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_ss&expand=2893)
39133#[inline]
39134#[target_feature(enable = "avx512f")]
39135#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39136#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39137#[rustc_legacy_const_generics(4, 5, 6)]
39138pub fn _mm_mask_getmant_round_ss<
39139 const NORM: _MM_MANTISSA_NORM_ENUM,
39140 const SIGN: _MM_MANTISSA_SIGN_ENUM,
39141 const SAE: i32,
39142>(
39143 src: __m128,
39144 k: __mmask8,
39145 a: __m128,
39146 b: __m128,
39147) -> __m128 {
39148 unsafe {
39149 static_assert_uimm_bits!(NORM, 4);
39150 static_assert_uimm_bits!(SIGN, 2);
39151 static_assert_mantissas_sae!(SAE);
39152 let a: f32x4 = a.as_f32x4();
39153 let b: f32x4 = b.as_f32x4();
39154 let src: f32x4 = src.as_f32x4();
39155 let r: f32x4 = vgetmantss(a, b, SIGN << 2 | NORM, src, m:k, SAE);
39156 transmute(src:r)
39157 }
39158}
39159
39160/// Normalize the mantissas of the lower single-precision (32-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39161/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39162/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39163/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39164/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39165/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39166/// The sign is determined by sc which can take the following values:\
39167/// _MM_MANT_SIGN_src // sign = sign(src)\
39168/// _MM_MANT_SIGN_zero // sign = 0\
39169/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39170/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39171///
39172/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_ss&expand=2894)
39173#[inline]
39174#[target_feature(enable = "avx512f")]
39175#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39176#[cfg_attr(test, assert_instr(vgetmantss, NORM = 0, SIGN = 0, SAE = 4))]
39177#[rustc_legacy_const_generics(3, 4, 5)]
39178pub fn _mm_maskz_getmant_round_ss<
39179 const NORM: _MM_MANTISSA_NORM_ENUM,
39180 const SIGN: _MM_MANTISSA_SIGN_ENUM,
39181 const SAE: i32,
39182>(
39183 k: __mmask8,
39184 a: __m128,
39185 b: __m128,
39186) -> __m128 {
39187 unsafe {
39188 static_assert_uimm_bits!(NORM, 4);
39189 static_assert_uimm_bits!(SIGN, 2);
39190 static_assert_mantissas_sae!(SAE);
39191 let a: f32x4 = a.as_f32x4();
39192 let b: f32x4 = b.as_f32x4();
39193 let r: f32x4 = vgetmantss(a, b, SIGN << 2 | NORM, src:f32x4::ZERO, m:k, SAE);
39194 transmute(src:r)
39195 }
39196}
39197
39198/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39199/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39200/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39201/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39202/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39203/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39204/// The sign is determined by sc which can take the following values:\
39205/// _MM_MANT_SIGN_src // sign = sign(src)\
39206/// _MM_MANT_SIGN_zero // sign = 0\
39207/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39208/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39209///
39210/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_getmant_round_sd&expand=2889)
39211#[inline]
39212#[target_feature(enable = "avx512f")]
39213#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39214#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39215#[rustc_legacy_const_generics(2, 3, 4)]
39216pub fn _mm_getmant_round_sd<
39217 const NORM: _MM_MANTISSA_NORM_ENUM,
39218 const SIGN: _MM_MANTISSA_SIGN_ENUM,
39219 const SAE: i32,
39220>(
39221 a: __m128d,
39222 b: __m128d,
39223) -> __m128d {
39224 unsafe {
39225 static_assert_uimm_bits!(NORM, 4);
39226 static_assert_uimm_bits!(SIGN, 2);
39227 static_assert_mantissas_sae!(SAE);
39228 let a: f64x2 = a.as_f64x2();
39229 let b: f64x2 = b.as_f64x2();
39230 let r: f64x2 = vgetmantsd(a, b, SIGN << 2 | NORM, src:f64x2::ZERO, m:0b1, SAE);
39231 transmute(src:r)
39232 }
39233}
39234
39235/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39236/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39237/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39238/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39239/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39240/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39241/// The sign is determined by sc which can take the following values:\
39242/// _MM_MANT_SIGN_src // sign = sign(src)\
39243/// _MM_MANT_SIGN_zero // sign = 0\
39244/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39245/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39246///
39247/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_getmant_round_sd&expand=2890)
39248#[inline]
39249#[target_feature(enable = "avx512f")]
39250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39251#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39252#[rustc_legacy_const_generics(4, 5, 6)]
39253pub fn _mm_mask_getmant_round_sd<
39254 const NORM: _MM_MANTISSA_NORM_ENUM,
39255 const SIGN: _MM_MANTISSA_SIGN_ENUM,
39256 const SAE: i32,
39257>(
39258 src: __m128d,
39259 k: __mmask8,
39260 a: __m128d,
39261 b: __m128d,
39262) -> __m128d {
39263 unsafe {
39264 static_assert_uimm_bits!(NORM, 4);
39265 static_assert_uimm_bits!(SIGN, 2);
39266 static_assert_mantissas_sae!(SAE);
39267 let a: f64x2 = a.as_f64x2();
39268 let b: f64x2 = b.as_f64x2();
39269 let src: f64x2 = src.as_f64x2();
39270 let r: f64x2 = vgetmantsd(a, b, SIGN << 2 | NORM, src, m:k, SAE);
39271 transmute(src:r)
39272 }
39273}
39274
39275/// Normalize the mantissas of the lower double-precision (64-bit) floating-point element in b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. This intrinsic essentially calculates ±(2^k)*|x.significand|, where k depends on the interval range defined by interv and the sign depends on sc and the source sign.\
39276/// The mantissa is normalized to the interval specified by interv, which can take the following values:\
39277/// _MM_MANT_NORM_1_2 // interval [1, 2)\
39278/// _MM_MANT_NORM_p5_2 // interval [0.5, 2)\
39279/// _MM_MANT_NORM_p5_1 // interval [0.5, 1)\
39280/// _MM_MANT_NORM_p75_1p5 // interval [0.75, 1.5)\
39281/// The sign is determined by sc which can take the following values:\
39282/// _MM_MANT_SIGN_src // sign = sign(src)\
39283/// _MM_MANT_SIGN_zero // sign = 0\
39284/// _MM_MANT_SIGN_nan // dst = NaN if sign(src) = 1\
39285/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39286///
39287/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_getmant_round_sd&expand=2891)
39288#[inline]
39289#[target_feature(enable = "avx512f")]
39290#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39291#[cfg_attr(test, assert_instr(vgetmantsd, NORM = 0, SIGN = 0, SAE = 4))]
39292#[rustc_legacy_const_generics(3, 4, 5)]
39293pub fn _mm_maskz_getmant_round_sd<
39294 const NORM: _MM_MANTISSA_NORM_ENUM,
39295 const SIGN: _MM_MANTISSA_SIGN_ENUM,
39296 const SAE: i32,
39297>(
39298 k: __mmask8,
39299 a: __m128d,
39300 b: __m128d,
39301) -> __m128d {
39302 unsafe {
39303 static_assert_uimm_bits!(NORM, 4);
39304 static_assert_uimm_bits!(SIGN, 2);
39305 static_assert_mantissas_sae!(SAE);
39306 let a: f64x2 = a.as_f64x2();
39307 let b: f64x2 = b.as_f64x2();
39308 let r: f64x2 = vgetmantsd(a, b, SIGN << 2 | NORM, src:f64x2::ZERO, m:k, SAE);
39309 transmute(src:r)
39310 }
39311}
39312
39313/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39314/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39315/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39316/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39317/// * [`_MM_FROUND_TO_POS_INF`] : round up
39318/// * [`_MM_FROUND_TO_ZERO`] : truncate
39319/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39320///
39321/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39322/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_ss&expand=4796)
39323#[inline]
39324#[target_feature(enable = "avx512f")]
39325#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39326#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39327#[rustc_legacy_const_generics(2, 3)]
39328pub fn _mm_roundscale_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b: __m128) -> __m128 {
39329 unsafe {
39330 static_assert_uimm_bits!(IMM8, 8);
39331 static_assert_mantissas_sae!(SAE);
39332 let a: f32x4 = a.as_f32x4();
39333 let b: f32x4 = b.as_f32x4();
39334 let r: f32x4 = vrndscaless(a, b, src:f32x4::ZERO, mask:0b11111111, IMM8, SAE);
39335 transmute(src:r)
39336 }
39337}
39338
39339/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39340/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39341/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39342/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39343/// * [`_MM_FROUND_TO_POS_INF`] : round up
39344/// * [`_MM_FROUND_TO_ZERO`] : truncate
39345/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39346///
39347/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39348/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_ss&expand=4794)
39349#[inline]
39350#[target_feature(enable = "avx512f")]
39351#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39352#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39353#[rustc_legacy_const_generics(4, 5)]
39354pub fn _mm_mask_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39355 src: __m128,
39356 k: __mmask8,
39357 a: __m128,
39358 b: __m128,
39359) -> __m128 {
39360 unsafe {
39361 static_assert_uimm_bits!(IMM8, 8);
39362 static_assert_mantissas_sae!(SAE);
39363 let a: f32x4 = a.as_f32x4();
39364 let b: f32x4 = b.as_f32x4();
39365 let src: f32x4 = src.as_f32x4();
39366 let r: f32x4 = vrndscaless(a, b, src, mask:k, IMM8, SAE);
39367 transmute(src:r)
39368 }
39369}
39370
39371/// Round the lower single-precision (32-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39372/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39373/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39374/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39375/// * [`_MM_FROUND_TO_POS_INF`] : round up
39376/// * [`_MM_FROUND_TO_ZERO`] : truncate
39377/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39378///
39379/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39380/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_ss&expand=4795)
39381#[inline]
39382#[target_feature(enable = "avx512f")]
39383#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39384#[cfg_attr(test, assert_instr(vrndscaless, IMM8 = 0, SAE = 8))]
39385#[rustc_legacy_const_generics(3, 4)]
39386pub fn _mm_maskz_roundscale_round_ss<const IMM8: i32, const SAE: i32>(
39387 k: __mmask8,
39388 a: __m128,
39389 b: __m128,
39390) -> __m128 {
39391 unsafe {
39392 static_assert_uimm_bits!(IMM8, 8);
39393 static_assert_mantissas_sae!(SAE);
39394 let a: f32x4 = a.as_f32x4();
39395 let b: f32x4 = b.as_f32x4();
39396 let r: f32x4 = vrndscaless(a, b, src:f32x4::ZERO, mask:k, IMM8, SAE);
39397 transmute(src:r)
39398 }
39399}
39400
39401/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39402/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39403/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39404/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39405/// * [`_MM_FROUND_TO_POS_INF`] : round up
39406/// * [`_MM_FROUND_TO_ZERO`] : truncate
39407/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39408///
39409/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39410/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_roundscale_round_sd&expand=4793)
39411#[inline]
39412#[target_feature(enable = "avx512f")]
39413#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39414#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39415#[rustc_legacy_const_generics(2, 3)]
39416pub fn _mm_roundscale_round_sd<const IMM8: i32, const SAE: i32>(a: __m128d, b: __m128d) -> __m128d {
39417 unsafe {
39418 static_assert_uimm_bits!(IMM8, 8);
39419 static_assert_mantissas_sae!(SAE);
39420 let a: f64x2 = a.as_f64x2();
39421 let b: f64x2 = b.as_f64x2();
39422 let r: f64x2 = vrndscalesd(a, b, src:f64x2::ZERO, mask:0b11111111, IMM8, SAE);
39423 transmute(src:r)
39424 }
39425}
39426
39427/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39428/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39429/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39430/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39431/// * [`_MM_FROUND_TO_POS_INF`] : round up
39432/// * [`_MM_FROUND_TO_ZERO`] : truncate
39433/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39434///
39435/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39436/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_roundscale_round_sd&expand=4791)
39437#[inline]
39438#[target_feature(enable = "avx512f")]
39439#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39440#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39441#[rustc_legacy_const_generics(4, 5)]
39442pub fn _mm_mask_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39443 src: __m128d,
39444 k: __mmask8,
39445 a: __m128d,
39446 b: __m128d,
39447) -> __m128d {
39448 unsafe {
39449 static_assert_uimm_bits!(IMM8, 8);
39450 static_assert_mantissas_sae!(SAE);
39451 let a: f64x2 = a.as_f64x2();
39452 let b: f64x2 = b.as_f64x2();
39453 let src: f64x2 = src.as_f64x2();
39454 let r: f64x2 = vrndscalesd(a, b, src, mask:k, IMM8, SAE);
39455 transmute(src:r)
39456 }
39457}
39458
39459/// Round the lower double-precision (64-bit) floating-point element in b to the number of fraction bits specified by imm8, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39460/// Rounding is done according to the imm8\[2:0\] parameter, which can be one of:\
39461/// * [`_MM_FROUND_TO_NEAREST_INT`] : round to nearest
39462/// * [`_MM_FROUND_TO_NEG_INF`] : round down
39463/// * [`_MM_FROUND_TO_POS_INF`] : round up
39464/// * [`_MM_FROUND_TO_ZERO`] : truncate
39465/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39466///
39467/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
39468/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_roundscale_round_sd&expand=4792)
39469#[inline]
39470#[target_feature(enable = "avx512f")]
39471#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39472#[cfg_attr(test, assert_instr(vrndscalesd, IMM8 = 0, SAE = 8))]
39473#[rustc_legacy_const_generics(3, 4)]
39474pub fn _mm_maskz_roundscale_round_sd<const IMM8: i32, const SAE: i32>(
39475 k: __mmask8,
39476 a: __m128d,
39477 b: __m128d,
39478) -> __m128d {
39479 unsafe {
39480 static_assert_uimm_bits!(IMM8, 8);
39481 static_assert_mantissas_sae!(SAE);
39482 let a: f64x2 = a.as_f64x2();
39483 let b: f64x2 = b.as_f64x2();
39484 let r: f64x2 = vrndscalesd(a, b, src:f64x2::ZERO, mask:k, IMM8, SAE);
39485 transmute(src:r)
39486 }
39487}
39488
39489/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39490///
39491/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39492/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39493/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39494/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39495/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39496/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39497///
39498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_ss&expand=4895)
39499#[inline]
39500#[target_feature(enable = "avx512f")]
39501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39502#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39503#[rustc_legacy_const_generics(2)]
39504pub fn _mm_scalef_round_ss<const ROUNDING: i32>(a: __m128, b: __m128) -> __m128 {
39505 unsafe {
39506 static_assert_rounding!(ROUNDING);
39507 let a: f32x4 = a.as_f32x4();
39508 let b: f32x4 = b.as_f32x4();
39509 let r: f32x4 = vscalefss(a, b, src:f32x4::ZERO, mask:0b11111111, ROUNDING);
39510 transmute(src:r)
39511 }
39512}
39513
39514/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39515///
39516/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39517/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39518/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39519/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39520/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39521/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39522///
39523/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_ss&expand=4893)
39524#[inline]
39525#[target_feature(enable = "avx512f")]
39526#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39527#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39528#[rustc_legacy_const_generics(4)]
39529pub fn _mm_mask_scalef_round_ss<const ROUNDING: i32>(
39530 src: __m128,
39531 k: __mmask8,
39532 a: __m128,
39533 b: __m128,
39534) -> __m128 {
39535 unsafe {
39536 static_assert_rounding!(ROUNDING);
39537 let a: f32x4 = a.as_f32x4();
39538 let b: f32x4 = b.as_f32x4();
39539 let src: f32x4 = src.as_f32x4();
39540 let r: f32x4 = vscalefss(a, b, src, mask:k, ROUNDING);
39541 transmute(src:r)
39542 }
39543}
39544
39545/// Scale the packed single-precision (32-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39546///
39547/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39548/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39549/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39550/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39551/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39552/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39553///
39554/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_ss&expand=4894)
39555#[inline]
39556#[target_feature(enable = "avx512f")]
39557#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39558#[cfg_attr(test, assert_instr(vscalefss, ROUNDING = 8))]
39559#[rustc_legacy_const_generics(3)]
39560pub fn _mm_maskz_scalef_round_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128) -> __m128 {
39561 unsafe {
39562 static_assert_rounding!(ROUNDING);
39563 let a: f32x4 = a.as_f32x4();
39564 let b: f32x4 = b.as_f32x4();
39565 let r: f32x4 = vscalefss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
39566 transmute(src:r)
39567 }
39568}
39569
39570/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39571///
39572/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39573/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39574/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39575/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39576/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39577/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39578///
39579/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_scalef_round_sd&expand=4892)
39580#[inline]
39581#[target_feature(enable = "avx512f")]
39582#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39583#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39584#[rustc_legacy_const_generics(2)]
39585pub fn _mm_scalef_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d) -> __m128d {
39586 unsafe {
39587 static_assert_rounding!(ROUNDING);
39588 let a: f64x2 = a.as_f64x2();
39589 let b: f64x2 = b.as_f64x2();
39590 let r: f64x2 = vscalefsd(a, b, src:f64x2::ZERO, mask:0b11111111, ROUNDING);
39591 transmute(src:r)
39592 }
39593}
39594
39595/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39596///
39597/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39598/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39599/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39600/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39601/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39602/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39603///
39604/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_scalef_round_sd&expand=4890)
39605#[inline]
39606#[target_feature(enable = "avx512f")]
39607#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39608#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39609#[rustc_legacy_const_generics(4)]
39610pub fn _mm_mask_scalef_round_sd<const ROUNDING: i32>(
39611 src: __m128d,
39612 k: __mmask8,
39613 a: __m128d,
39614 b: __m128d,
39615) -> __m128d {
39616 unsafe {
39617 let a: f64x2 = a.as_f64x2();
39618 let b: f64x2 = b.as_f64x2();
39619 let src: f64x2 = src.as_f64x2();
39620 let r: f64x2 = vscalefsd(a, b, src, mask:k, ROUNDING);
39621 transmute(src:r)
39622 }
39623}
39624
39625/// Scale the packed double-precision (64-bit) floating-point elements in a using values from b, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39626///
39627/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39628/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39629/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39630/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39631/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39632/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39633///
39634/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_scalef_round_sd&expand=4891)
39635#[inline]
39636#[target_feature(enable = "avx512f")]
39637#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39638#[cfg_attr(test, assert_instr(vscalefsd, ROUNDING = 8))]
39639#[rustc_legacy_const_generics(3)]
39640pub fn _mm_maskz_scalef_round_sd<const ROUNDING: i32>(
39641 k: __mmask8,
39642 a: __m128d,
39643 b: __m128d,
39644) -> __m128d {
39645 unsafe {
39646 static_assert_rounding!(ROUNDING);
39647 let a: f64x2 = a.as_f64x2();
39648 let b: f64x2 = b.as_f64x2();
39649 let r: f64x2 = vscalefsd(a, b, src:f64x2::ZERO, mask:k, ROUNDING);
39650 transmute(src:r)
39651 }
39652}
39653
39654/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39655///
39656/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39657/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39658/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39659/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39660/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39661/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39662///
39663/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_ss&expand=2573)
39664#[inline]
39665#[target_feature(enable = "avx512f")]
39666#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39667#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39668#[rustc_legacy_const_generics(3)]
39669pub fn _mm_fmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
39670 unsafe {
39671 static_assert_rounding!(ROUNDING);
39672 let extracta: f32 = simd_extract!(a, 0);
39673 let extractb: f32 = simd_extract!(b, 0);
39674 let extractc: f32 = simd_extract!(c, 0);
39675 let r: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
39676 simd_insert!(a, 0, r)
39677 }
39678}
39679
39680/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39681///
39682/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39683/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39684/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39685/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39686/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39687/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39688///
39689/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_ss&expand=2574)
39690#[inline]
39691#[target_feature(enable = "avx512f")]
39692#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39693#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39694#[rustc_legacy_const_generics(4)]
39695pub fn _mm_mask_fmadd_round_ss<const ROUNDING: i32>(
39696 a: __m128,
39697 k: __mmask8,
39698 b: __m128,
39699 c: __m128,
39700) -> __m128 {
39701 unsafe {
39702 static_assert_rounding!(ROUNDING);
39703 let mut fmadd: f32 = simd_extract!(a, 0);
39704 if (k & 0b00000001) != 0 {
39705 let extractb: f32 = simd_extract!(b, 0);
39706 let extractc: f32 = simd_extract!(c, 0);
39707 fmadd = vfmaddssround(a:fmadd, b:extractb, c:extractc, ROUNDING);
39708 }
39709 simd_insert!(a, 0, fmadd)
39710 }
39711}
39712
39713/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39714///
39715/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39716/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39717/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39718/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39719/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39720/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39721///
39722/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_ss&expand=2576)
39723#[inline]
39724#[target_feature(enable = "avx512f")]
39725#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39726#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39727#[rustc_legacy_const_generics(4)]
39728pub fn _mm_maskz_fmadd_round_ss<const ROUNDING: i32>(
39729 k: __mmask8,
39730 a: __m128,
39731 b: __m128,
39732 c: __m128,
39733) -> __m128 {
39734 unsafe {
39735 static_assert_rounding!(ROUNDING);
39736 let mut fmadd: f32 = 0.;
39737 if (k & 0b00000001) != 0 {
39738 let extracta: f32 = simd_extract!(a, 0);
39739 let extractb: f32 = simd_extract!(b, 0);
39740 let extractc: f32 = simd_extract!(c, 0);
39741 fmadd = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
39742 }
39743 simd_insert!(a, 0, fmadd)
39744 }
39745}
39746
39747/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
39748///
39749/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39750/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39751/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39752/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39753/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39754/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39755///
39756/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_ss&expand=2575)
39757#[inline]
39758#[target_feature(enable = "avx512f")]
39759#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39760#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39761#[rustc_legacy_const_generics(4)]
39762pub fn _mm_mask3_fmadd_round_ss<const ROUNDING: i32>(
39763 a: __m128,
39764 b: __m128,
39765 c: __m128,
39766 k: __mmask8,
39767) -> __m128 {
39768 unsafe {
39769 static_assert_rounding!(ROUNDING);
39770 let mut fmadd: f32 = simd_extract!(c, 0);
39771 if (k & 0b00000001) != 0 {
39772 let extracta: f32 = simd_extract!(a, 0);
39773 let extractb: f32 = simd_extract!(b, 0);
39774 fmadd = vfmaddssround(a:extracta, b:extractb, c:fmadd, ROUNDING);
39775 }
39776 simd_insert!(c, 0, fmadd)
39777 }
39778}
39779
39780/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
39781///
39782/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39783/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39784/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39785/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39786/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39787/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39788///
39789/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmadd_round_sd&expand=2569)
39790#[inline]
39791#[target_feature(enable = "avx512f")]
39792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39793#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39794#[rustc_legacy_const_generics(3)]
39795pub fn _mm_fmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
39796 unsafe {
39797 static_assert_rounding!(ROUNDING);
39798 let extracta: f64 = simd_extract!(a, 0);
39799 let extractb: f64 = simd_extract!(b, 0);
39800 let extractc: f64 = simd_extract!(c, 0);
39801 let fmadd: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
39802 simd_insert!(a, 0, fmadd)
39803 }
39804}
39805
39806/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39807///
39808/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39809/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39810/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39811/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39812/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39813/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39814///
39815/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmadd_round_sd&expand=2570)
39816#[inline]
39817#[target_feature(enable = "avx512f")]
39818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39819#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39820#[rustc_legacy_const_generics(4)]
39821pub fn _mm_mask_fmadd_round_sd<const ROUNDING: i32>(
39822 a: __m128d,
39823 k: __mmask8,
39824 b: __m128d,
39825 c: __m128d,
39826) -> __m128d {
39827 unsafe {
39828 static_assert_rounding!(ROUNDING);
39829 let mut fmadd: f64 = simd_extract!(a, 0);
39830 if (k & 0b00000001) != 0 {
39831 let extractb: f64 = simd_extract!(b, 0);
39832 let extractc: f64 = simd_extract!(c, 0);
39833 fmadd = vfmaddsdround(a:fmadd, b:extractb, c:extractc, ROUNDING);
39834 }
39835 simd_insert!(a, 0, fmadd)
39836 }
39837}
39838
39839/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
39840///
39841/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39842/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39843/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39844/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39845/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39846/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39847///
39848/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmadd_round_sd&expand=2572)
39849#[inline]
39850#[target_feature(enable = "avx512f")]
39851#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39852#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39853#[rustc_legacy_const_generics(4)]
39854pub fn _mm_maskz_fmadd_round_sd<const ROUNDING: i32>(
39855 k: __mmask8,
39856 a: __m128d,
39857 b: __m128d,
39858 c: __m128d,
39859) -> __m128d {
39860 unsafe {
39861 static_assert_rounding!(ROUNDING);
39862 let mut fmadd: f64 = 0.;
39863 if (k & 0b00000001) != 0 {
39864 let extracta: f64 = simd_extract!(a, 0);
39865 let extractb: f64 = simd_extract!(b, 0);
39866 let extractc: f64 = simd_extract!(c, 0);
39867 fmadd = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
39868 }
39869 simd_insert!(a, 0, fmadd)
39870 }
39871}
39872
39873/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
39874///
39875/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39876/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39877/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39878/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39879/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39880/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39881///
39882/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmadd_round_sd&expand=2571)
39883#[inline]
39884#[target_feature(enable = "avx512f")]
39885#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39886#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))]
39887#[rustc_legacy_const_generics(4)]
39888pub fn _mm_mask3_fmadd_round_sd<const ROUNDING: i32>(
39889 a: __m128d,
39890 b: __m128d,
39891 c: __m128d,
39892 k: __mmask8,
39893) -> __m128d {
39894 unsafe {
39895 static_assert_rounding!(ROUNDING);
39896 let mut fmadd: f64 = simd_extract!(c, 0);
39897 if (k & 0b00000001) != 0 {
39898 let extracta: f64 = simd_extract!(a, 0);
39899 let extractb: f64 = simd_extract!(b, 0);
39900 fmadd = vfmaddsdround(a:extracta, b:extractb, c:fmadd, ROUNDING);
39901 }
39902 simd_insert!(c, 0, fmadd)
39903 }
39904}
39905
39906/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
39907///
39908/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39909/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39910/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39911/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39912/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39913/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39914///
39915/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_ss&expand=2659)
39916#[inline]
39917#[target_feature(enable = "avx512f")]
39918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39919#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
39920#[rustc_legacy_const_generics(3)]
39921pub fn _mm_fmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
39922 unsafe {
39923 static_assert_rounding!(ROUNDING);
39924 let extracta: f32 = simd_extract!(a, 0);
39925 let extractb: f32 = simd_extract!(b, 0);
39926 let extractc: f32 = simd_extract!(c, 0);
39927 let extractc: f32 = -extractc;
39928 let fmsub: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
39929 simd_insert!(a, 0, fmsub)
39930 }
39931}
39932
39933/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39934///
39935/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39936/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39937/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39938/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39939/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39940/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39941///
39942/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_ss&expand=2660)
39943#[inline]
39944#[target_feature(enable = "avx512f")]
39945#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39946#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
39947#[rustc_legacy_const_generics(4)]
39948pub fn _mm_mask_fmsub_round_ss<const ROUNDING: i32>(
39949 a: __m128,
39950 k: __mmask8,
39951 b: __m128,
39952 c: __m128,
39953) -> __m128 {
39954 unsafe {
39955 static_assert_rounding!(ROUNDING);
39956 let mut fmsub: f32 = simd_extract!(a, 0);
39957 if (k & 0b00000001) != 0 {
39958 let extractb: f32 = simd_extract!(b, 0);
39959 let extractc: f32 = simd_extract!(c, 0);
39960 let extractc: f32 = -extractc;
39961 fmsub = vfmaddssround(a:fmsub, b:extractb, c:extractc, ROUNDING);
39962 }
39963 simd_insert!(a, 0, fmsub)
39964 }
39965}
39966
39967/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
39968///
39969/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
39970/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
39971/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
39972/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
39973/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
39974/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
39975///
39976/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_ss&expand=2662)
39977#[inline]
39978#[target_feature(enable = "avx512f")]
39979#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
39980#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
39981#[rustc_legacy_const_generics(4)]
39982pub fn _mm_maskz_fmsub_round_ss<const ROUNDING: i32>(
39983 k: __mmask8,
39984 a: __m128,
39985 b: __m128,
39986 c: __m128,
39987) -> __m128 {
39988 unsafe {
39989 static_assert_rounding!(ROUNDING);
39990 let mut fmsub: f32 = 0.;
39991 if (k & 0b00000001) != 0 {
39992 let extracta: f32 = simd_extract!(a, 0);
39993 let extractb: f32 = simd_extract!(b, 0);
39994 let extractc: f32 = simd_extract!(c, 0);
39995 let extractc: f32 = -extractc;
39996 fmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
39997 }
39998 simd_insert!(a, 0, fmsub)
39999 }
40000}
40001
40002/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40003///
40004/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40005/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40006/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40007/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40008/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40009/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40010///
40011/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_ss&expand=2661)
40012#[inline]
40013#[target_feature(enable = "avx512f")]
40014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40015#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40016#[rustc_legacy_const_generics(4)]
40017pub fn _mm_mask3_fmsub_round_ss<const ROUNDING: i32>(
40018 a: __m128,
40019 b: __m128,
40020 c: __m128,
40021 k: __mmask8,
40022) -> __m128 {
40023 unsafe {
40024 static_assert_rounding!(ROUNDING);
40025 let mut fmsub: f32 = simd_extract!(c, 0);
40026 if (k & 0b00000001) != 0 {
40027 let extracta: f32 = simd_extract!(a, 0);
40028 let extractb: f32 = simd_extract!(b, 0);
40029 let extractc: f32 = -fmsub;
40030 fmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40031 }
40032 simd_insert!(c, 0, fmsub)
40033 }
40034}
40035
40036/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40037///
40038/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40039/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40040/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40041/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40042/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40043/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40044///
40045/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fmsub_round_sd&expand=2655)
40046#[inline]
40047#[target_feature(enable = "avx512f")]
40048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40049#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40050#[rustc_legacy_const_generics(3)]
40051pub fn _mm_fmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40052 unsafe {
40053 static_assert_rounding!(ROUNDING);
40054 let extracta: f64 = simd_extract!(a, 0);
40055 let extractb: f64 = simd_extract!(b, 0);
40056 let extractc: f64 = simd_extract!(c, 0);
40057 let extractc: f64 = -extractc;
40058 let fmsub: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40059 simd_insert!(a, 0, fmsub)
40060 }
40061}
40062
40063/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40064///
40065/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40066/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40067/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40068/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40069/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40070/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40071///
40072/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fmsub_round_sd&expand=2656)
40073#[inline]
40074#[target_feature(enable = "avx512f")]
40075#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40076#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40077#[rustc_legacy_const_generics(4)]
40078pub fn _mm_mask_fmsub_round_sd<const ROUNDING: i32>(
40079 a: __m128d,
40080 k: __mmask8,
40081 b: __m128d,
40082 c: __m128d,
40083) -> __m128d {
40084 unsafe {
40085 static_assert_rounding!(ROUNDING);
40086 let mut fmsub: f64 = simd_extract!(a, 0);
40087 if (k & 0b00000001) != 0 {
40088 let extractb: f64 = simd_extract!(b, 0);
40089 let extractc: f64 = simd_extract!(c, 0);
40090 let extractc: f64 = -extractc;
40091 fmsub = vfmaddsdround(a:fmsub, b:extractb, c:extractc, ROUNDING);
40092 }
40093 simd_insert!(a, 0, fmsub)
40094 }
40095}
40096
40097/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40098///
40099/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40100/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40101/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40102/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40103/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40104/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40105///
40106/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fmsub_round_sd&expand=2658)
40107#[inline]
40108#[target_feature(enable = "avx512f")]
40109#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40110#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40111#[rustc_legacy_const_generics(4)]
40112pub fn _mm_maskz_fmsub_round_sd<const ROUNDING: i32>(
40113 k: __mmask8,
40114 a: __m128d,
40115 b: __m128d,
40116 c: __m128d,
40117) -> __m128d {
40118 unsafe {
40119 static_assert_rounding!(ROUNDING);
40120 let mut fmsub: f64 = 0.;
40121 if (k & 0b00000001) != 0 {
40122 let extracta: f64 = simd_extract!(a, 0);
40123 let extractb: f64 = simd_extract!(b, 0);
40124 let extractc: f64 = simd_extract!(c, 0);
40125 let extractc: f64 = -extractc;
40126 fmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40127 }
40128 simd_insert!(a, 0, fmsub)
40129 }
40130}
40131
40132/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40133///
40134/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40135/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40136/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40137/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40138/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40139/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40140///
40141/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fmsub_round_sd&expand=2657)
40142#[inline]
40143#[target_feature(enable = "avx512f")]
40144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40145#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))]
40146#[rustc_legacy_const_generics(4)]
40147pub fn _mm_mask3_fmsub_round_sd<const ROUNDING: i32>(
40148 a: __m128d,
40149 b: __m128d,
40150 c: __m128d,
40151 k: __mmask8,
40152) -> __m128d {
40153 unsafe {
40154 static_assert_rounding!(ROUNDING);
40155 let mut fmsub: f64 = simd_extract!(c, 0);
40156 if (k & 0b00000001) != 0 {
40157 let extracta: f64 = simd_extract!(a, 0);
40158 let extractb: f64 = simd_extract!(b, 0);
40159 let extractc: f64 = -fmsub;
40160 fmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40161 }
40162 simd_insert!(c, 0, fmsub)
40163 }
40164}
40165
40166/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40167///
40168/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40169/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40170/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40171/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40172/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40173/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40174///
40175/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_ss&expand=2739)
40176#[inline]
40177#[target_feature(enable = "avx512f")]
40178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40179#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40180#[rustc_legacy_const_generics(3)]
40181pub fn _mm_fnmadd_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40182 unsafe {
40183 static_assert_rounding!(ROUNDING);
40184 let extracta: f32 = simd_extract!(a, 0);
40185 let extracta: f32 = -extracta;
40186 let extractb: f32 = simd_extract!(b, 0);
40187 let extractc: f32 = simd_extract!(c, 0);
40188 let fnmadd: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40189 simd_insert!(a, 0, fnmadd)
40190 }
40191}
40192
40193/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40194///
40195/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40196/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40197/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40198/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40199/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40200/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40201///
40202/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_ss&expand=2740)
40203#[inline]
40204#[target_feature(enable = "avx512f")]
40205#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40206#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40207#[rustc_legacy_const_generics(4)]
40208pub fn _mm_mask_fnmadd_round_ss<const ROUNDING: i32>(
40209 a: __m128,
40210 k: __mmask8,
40211 b: __m128,
40212 c: __m128,
40213) -> __m128 {
40214 unsafe {
40215 static_assert_rounding!(ROUNDING);
40216 let mut fnmadd: f32 = simd_extract!(a, 0);
40217 if (k & 0b00000001) != 0 {
40218 let extracta: f32 = -fnmadd;
40219 let extractb: f32 = simd_extract!(b, 0);
40220 let extractc: f32 = simd_extract!(c, 0);
40221 fnmadd = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40222 }
40223 simd_insert!(a, 0, fnmadd)
40224 }
40225}
40226
40227/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40228///
40229/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40230/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40231/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40232/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40233/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40234/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40235///
40236/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_ss&expand=2742)
40237#[inline]
40238#[target_feature(enable = "avx512f")]
40239#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40240#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40241#[rustc_legacy_const_generics(4)]
40242pub fn _mm_maskz_fnmadd_round_ss<const ROUNDING: i32>(
40243 k: __mmask8,
40244 a: __m128,
40245 b: __m128,
40246 c: __m128,
40247) -> __m128 {
40248 unsafe {
40249 static_assert_rounding!(ROUNDING);
40250 let mut fnmadd: f32 = 0.;
40251 if (k & 0b00000001) != 0 {
40252 let extracta: f32 = simd_extract!(a, 0);
40253 let extracta: f32 = -extracta;
40254 let extractb: f32 = simd_extract!(b, 0);
40255 let extractc: f32 = simd_extract!(c, 0);
40256 fnmadd = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40257 }
40258 simd_insert!(a, 0, fnmadd)
40259 }
40260}
40261
40262/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40263///
40264/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40265/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40266/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40267/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40268/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40269/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40270///
40271/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_ss&expand=2741)
40272#[inline]
40273#[target_feature(enable = "avx512f")]
40274#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40275#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40276#[rustc_legacy_const_generics(4)]
40277pub fn _mm_mask3_fnmadd_round_ss<const ROUNDING: i32>(
40278 a: __m128,
40279 b: __m128,
40280 c: __m128,
40281 k: __mmask8,
40282) -> __m128 {
40283 unsafe {
40284 static_assert_rounding!(ROUNDING);
40285 let mut fnmadd: f32 = simd_extract!(c, 0);
40286 if (k & 0b00000001) != 0 {
40287 let extracta: f32 = simd_extract!(a, 0);
40288 let extracta: f32 = -extracta;
40289 let extractb: f32 = simd_extract!(b, 0);
40290 fnmadd = vfmaddssround(a:extracta, b:extractb, c:fnmadd, ROUNDING);
40291 }
40292 simd_insert!(c, 0, fnmadd)
40293 }
40294}
40295
40296/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40297///
40298/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40299/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40300/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40301/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40302/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40303/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40304///
40305/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmadd_round_sd&expand=2735)
40306#[inline]
40307#[target_feature(enable = "avx512f")]
40308#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40309#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40310#[rustc_legacy_const_generics(3)]
40311pub fn _mm_fnmadd_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40312 unsafe {
40313 static_assert_rounding!(ROUNDING);
40314 let extracta: f64 = simd_extract!(a, 0);
40315 let extracta: f64 = -extracta;
40316 let extractb: f64 = simd_extract!(b, 0);
40317 let extractc: f64 = simd_extract!(c, 0);
40318 let fnmadd: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40319 simd_insert!(a, 0, fnmadd)
40320 }
40321}
40322
40323/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40324///
40325/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40326/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40327/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40328/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40329/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40330/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40331///
40332/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmadd_round_sd&expand=2736)
40333#[inline]
40334#[target_feature(enable = "avx512f")]
40335#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40336#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40337#[rustc_legacy_const_generics(4)]
40338pub fn _mm_mask_fnmadd_round_sd<const ROUNDING: i32>(
40339 a: __m128d,
40340 k: __mmask8,
40341 b: __m128d,
40342 c: __m128d,
40343) -> __m128d {
40344 unsafe {
40345 static_assert_rounding!(ROUNDING);
40346 let mut fnmadd: f64 = simd_extract!(a, 0);
40347 if (k & 0b00000001) != 0 {
40348 let extracta: f64 = -fnmadd;
40349 let extractb: f64 = simd_extract!(b, 0);
40350 let extractc: f64 = simd_extract!(c, 0);
40351 fnmadd = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40352 }
40353 simd_insert!(a, 0, fnmadd)
40354 }
40355}
40356
40357/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40358///
40359/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40360/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40361/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40362/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40363/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40364/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40365///
40366/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmadd_round_sd&expand=2738)
40367#[inline]
40368#[target_feature(enable = "avx512f")]
40369#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40370#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40371#[rustc_legacy_const_generics(4)]
40372pub fn _mm_maskz_fnmadd_round_sd<const ROUNDING: i32>(
40373 k: __mmask8,
40374 a: __m128d,
40375 b: __m128d,
40376 c: __m128d,
40377) -> __m128d {
40378 unsafe {
40379 static_assert_rounding!(ROUNDING);
40380 let mut fnmadd: f64 = 0.;
40381 if (k & 0b00000001) != 0 {
40382 let extracta: f64 = simd_extract!(a, 0);
40383 let extracta: f64 = -extracta;
40384 let extractb: f64 = simd_extract!(b, 0);
40385 let extractc: f64 = simd_extract!(c, 0);
40386 fnmadd = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40387 }
40388 simd_insert!(a, 0, fnmadd)
40389 }
40390}
40391
40392/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and add the negated intermediate result to the lower element in c. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40393///
40394/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40395/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40396/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40397/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40398/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40399/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40400///
40401/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmadd_round_sd&expand=2737)
40402#[inline]
40403#[target_feature(enable = "avx512f")]
40404#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40405#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))]
40406#[rustc_legacy_const_generics(4)]
40407pub fn _mm_mask3_fnmadd_round_sd<const ROUNDING: i32>(
40408 a: __m128d,
40409 b: __m128d,
40410 c: __m128d,
40411 k: __mmask8,
40412) -> __m128d {
40413 unsafe {
40414 static_assert_rounding!(ROUNDING);
40415 let mut fnmadd: f64 = simd_extract!(c, 0);
40416 if (k & 0b00000001) != 0 {
40417 let extracta: f64 = simd_extract!(a, 0);
40418 let extracta: f64 = -extracta;
40419 let extractb: f64 = simd_extract!(b, 0);
40420 fnmadd = vfmaddsdround(a:extracta, b:extractb, c:fnmadd, ROUNDING);
40421 }
40422 simd_insert!(c, 0, fnmadd)
40423 }
40424}
40425
40426/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
40427///
40428/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40429/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40430/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40431/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40432/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40433/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40434///
40435/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_ss&expand=2787)
40436#[inline]
40437#[target_feature(enable = "avx512f")]
40438#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40439#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40440#[rustc_legacy_const_generics(3)]
40441pub fn _mm_fnmsub_round_ss<const ROUNDING: i32>(a: __m128, b: __m128, c: __m128) -> __m128 {
40442 unsafe {
40443 static_assert_rounding!(ROUNDING);
40444 let extracta: f32 = simd_extract!(a, 0);
40445 let extracta: f32 = -extracta;
40446 let extractb: f32 = simd_extract!(b, 0);
40447 let extractc: f32 = simd_extract!(c, 0);
40448 let extractc: f32 = -extractc;
40449 let fnmsub: f32 = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40450 simd_insert!(a, 0, fnmsub)
40451 }
40452}
40453
40454/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40455///
40456/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40457/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40458/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40459/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40460/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40461/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40462///
40463/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_ss&expand=2788)
40464#[inline]
40465#[target_feature(enable = "avx512f")]
40466#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40467#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40468#[rustc_legacy_const_generics(4)]
40469pub fn _mm_mask_fnmsub_round_ss<const ROUNDING: i32>(
40470 a: __m128,
40471 k: __mmask8,
40472 b: __m128,
40473 c: __m128,
40474) -> __m128 {
40475 unsafe {
40476 static_assert_rounding!(ROUNDING);
40477 let mut fnmsub: f32 = simd_extract!(a, 0);
40478 if (k & 0b00000001) != 0 {
40479 let extracta: f32 = -fnmsub;
40480 let extractb: f32 = simd_extract!(b, 0);
40481 let extractc: f32 = simd_extract!(c, 0);
40482 let extractc: f32 = -extractc;
40483 fnmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40484 }
40485 simd_insert!(a, 0, fnmsub)
40486 }
40487}
40488
40489/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
40490///
40491/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40492/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40493/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40494/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40495/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40496/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40497///
40498/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_ss&expand=2790)
40499#[inline]
40500#[target_feature(enable = "avx512f")]
40501#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40502#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40503#[rustc_legacy_const_generics(4)]
40504pub fn _mm_maskz_fnmsub_round_ss<const ROUNDING: i32>(
40505 k: __mmask8,
40506 a: __m128,
40507 b: __m128,
40508 c: __m128,
40509) -> __m128 {
40510 unsafe {
40511 static_assert_rounding!(ROUNDING);
40512 let mut fnmsub: f32 = 0.;
40513 if (k & 0b00000001) != 0 {
40514 let extracta: f32 = simd_extract!(a, 0);
40515 let extracta: f32 = -extracta;
40516 let extractb: f32 = simd_extract!(b, 0);
40517 let extractc: f32 = simd_extract!(c, 0);
40518 let extractc: f32 = -extractc;
40519 fnmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40520 }
40521 simd_insert!(a, 0, fnmsub)
40522 }
40523}
40524
40525/// Multiply the lower single-precision (32-bit) floating-point elements in a and b, subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper 3 packed elements from c to the upper elements of dst.\
40526///
40527/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40528/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40529/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40530/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40531/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40532/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40533///
40534/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_ss&expand=2789)
40535#[inline]
40536#[target_feature(enable = "avx512f")]
40537#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40538#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40539#[rustc_legacy_const_generics(4)]
40540pub fn _mm_mask3_fnmsub_round_ss<const ROUNDING: i32>(
40541 a: __m128,
40542 b: __m128,
40543 c: __m128,
40544 k: __mmask8,
40545) -> __m128 {
40546 unsafe {
40547 static_assert_rounding!(ROUNDING);
40548 let mut fnmsub: f32 = simd_extract!(c, 0);
40549 if (k & 0b00000001) != 0 {
40550 let extracta: f32 = simd_extract!(a, 0);
40551 let extracta: f32 = -extracta;
40552 let extractb: f32 = simd_extract!(b, 0);
40553 let extractc: f32 = -fnmsub;
40554 fnmsub = vfmaddssround(a:extracta, b:extractb, c:extractc, ROUNDING);
40555 }
40556 simd_insert!(c, 0, fnmsub)
40557 }
40558}
40559
40560/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
40561///
40562/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40563/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40564/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40565/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40566/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40567/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40568///
40569/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fnmsub_round_sd&expand=2783)
40570#[inline]
40571#[target_feature(enable = "avx512f")]
40572#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40573#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40574#[rustc_legacy_const_generics(3)]
40575pub fn _mm_fnmsub_round_sd<const ROUNDING: i32>(a: __m128d, b: __m128d, c: __m128d) -> __m128d {
40576 unsafe {
40577 static_assert_rounding!(ROUNDING);
40578 let extracta: f64 = simd_extract!(a, 0);
40579 let extracta: f64 = -extracta;
40580 let extractb: f64 = simd_extract!(b, 0);
40581 let extractc: f64 = simd_extract!(c, 0);
40582 let extractc: f64 = -extractc;
40583 let fnmsub: f64 = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40584 simd_insert!(a, 0, fnmsub)
40585 }
40586}
40587
40588/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40589///
40590/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40591/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40592/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40593/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40594/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40595/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40596///
40597/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fnmsub_round_sd&expand=2784)
40598#[inline]
40599#[target_feature(enable = "avx512f")]
40600#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40601#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40602#[rustc_legacy_const_generics(4)]
40603pub fn _mm_mask_fnmsub_round_sd<const ROUNDING: i32>(
40604 a: __m128d,
40605 k: __mmask8,
40606 b: __m128d,
40607 c: __m128d,
40608) -> __m128d {
40609 unsafe {
40610 static_assert_rounding!(ROUNDING);
40611 let mut fnmsub: f64 = simd_extract!(a, 0);
40612 if (k & 0b00000001) != 0 {
40613 let extracta: f64 = -fnmsub;
40614 let extractb: f64 = simd_extract!(b, 0);
40615 let extractc: f64 = simd_extract!(c, 0);
40616 let extractc: f64 = -extractc;
40617 fnmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40618 }
40619 simd_insert!(a, 0, fnmsub)
40620 }
40621}
40622
40623/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
40624///
40625/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40626/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40627/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40628/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40629/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40630/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40631///
40632/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fnmsub_round_sd&expand=2786)
40633#[inline]
40634#[target_feature(enable = "avx512f")]
40635#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40636#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40637#[rustc_legacy_const_generics(4)]
40638pub fn _mm_maskz_fnmsub_round_sd<const ROUNDING: i32>(
40639 k: __mmask8,
40640 a: __m128d,
40641 b: __m128d,
40642 c: __m128d,
40643) -> __m128d {
40644 unsafe {
40645 static_assert_rounding!(ROUNDING);
40646 let mut fnmsub: f64 = 0.;
40647 if (k & 0b00000001) != 0 {
40648 let extracta: f64 = simd_extract!(a, 0);
40649 let extracta: f64 = -extracta;
40650 let extractb: f64 = simd_extract!(b, 0);
40651 let extractc: f64 = simd_extract!(c, 0);
40652 let extractc: f64 = -extractc;
40653 fnmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40654 }
40655 simd_insert!(a, 0, fnmsub)
40656 }
40657}
40658
40659/// Multiply the lower double-precision (64-bit) floating-point elements in a and b, and subtract the lower element in c from the negated intermediate result. Store the result in the lower element of dst using writemask k (the element is copied from c when mask bit 0 is not set), and copy the upper element from c to the upper element of dst.\
40660///
40661/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
40662/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
40663/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
40664/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
40665/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
40666/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
40667///
40668/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask3_fnmsub_round_sd&expand=2785)
40669#[inline]
40670#[target_feature(enable = "avx512f")]
40671#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40672#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))]
40673#[rustc_legacy_const_generics(4)]
40674pub fn _mm_mask3_fnmsub_round_sd<const ROUNDING: i32>(
40675 a: __m128d,
40676 b: __m128d,
40677 c: __m128d,
40678 k: __mmask8,
40679) -> __m128d {
40680 unsafe {
40681 static_assert_rounding!(ROUNDING);
40682 let mut fnmsub: f64 = simd_extract!(c, 0);
40683 if (k & 0b00000001) != 0 {
40684 let extracta: f64 = simd_extract!(a, 0);
40685 let extracta: f64 = -extracta;
40686 let extractb: f64 = simd_extract!(b, 0);
40687 let extractc: f64 = -fnmsub;
40688 fnmsub = vfmaddsdround(a:extracta, b:extractb, c:extractc, ROUNDING);
40689 }
40690 simd_insert!(c, 0, fnmsub)
40691 }
40692}
40693
40694/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40695///
40696/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_ss&expand=2517)
40697#[inline]
40698#[target_feature(enable = "avx512f")]
40699#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40700#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40701#[rustc_legacy_const_generics(3)]
40702pub fn _mm_fixupimm_ss<const IMM8: i32>(a: __m128, b: __m128, c: __m128i) -> __m128 {
40703 unsafe {
40704 static_assert_uimm_bits!(IMM8, 8);
40705 let a: f32x4 = a.as_f32x4();
40706 let b: f32x4 = b.as_f32x4();
40707 let c: i32x4 = c.as_i32x4();
40708 let r: f32x4 = vfixupimmss(a, b, c, IMM8, mask:0b11111111, _MM_FROUND_CUR_DIRECTION);
40709 let fixupimm: f32 = simd_extract!(r, 0);
40710 let r: f32x4 = simd_insert!(a, 0, fixupimm);
40711 transmute(src:r)
40712 }
40713}
40714
40715/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40716///
40717/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_ss&expand=2518)
40718#[inline]
40719#[target_feature(enable = "avx512f")]
40720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40721#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40722#[rustc_legacy_const_generics(4)]
40723pub fn _mm_mask_fixupimm_ss<const IMM8: i32>(
40724 a: __m128,
40725 k: __mmask8,
40726 b: __m128,
40727 c: __m128i,
40728) -> __m128 {
40729 unsafe {
40730 static_assert_uimm_bits!(IMM8, 8);
40731 let a: f32x4 = a.as_f32x4();
40732 let b: f32x4 = b.as_f32x4();
40733 let c: i32x4 = c.as_i32x4();
40734 let fixupimm: f32x4 = vfixupimmss(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
40735 let fixupimm: f32 = simd_extract!(fixupimm, 0);
40736 let r: f32x4 = simd_insert!(a, 0, fixupimm);
40737 transmute(src:r)
40738 }
40739}
40740
40741/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.
40742///
40743/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_ss&expand=2519)
40744#[inline]
40745#[target_feature(enable = "avx512f")]
40746#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40747#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0))]
40748#[rustc_legacy_const_generics(4)]
40749pub fn _mm_maskz_fixupimm_ss<const IMM8: i32>(
40750 k: __mmask8,
40751 a: __m128,
40752 b: __m128,
40753 c: __m128i,
40754) -> __m128 {
40755 unsafe {
40756 static_assert_uimm_bits!(IMM8, 8);
40757 let a: f32x4 = a.as_f32x4();
40758 let b: f32x4 = b.as_f32x4();
40759 let c: i32x4 = c.as_i32x4();
40760 let fixupimm: f32x4 = vfixupimmssz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
40761 let fixupimm: f32 = simd_extract!(fixupimm, 0);
40762 let r: f32x4 = simd_insert!(a, 0, fixupimm);
40763 transmute(src:r)
40764 }
40765}
40766
40767/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40768///
40769/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_sd&expand=2514)
40770#[inline]
40771#[target_feature(enable = "avx512f")]
40772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40773#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40774#[rustc_legacy_const_generics(3)]
40775pub fn _mm_fixupimm_sd<const IMM8: i32>(a: __m128d, b: __m128d, c: __m128i) -> __m128d {
40776 unsafe {
40777 static_assert_uimm_bits!(IMM8, 8);
40778 let a: f64x2 = a.as_f64x2();
40779 let b: f64x2 = b.as_f64x2();
40780 let c: i64x2 = c.as_i64x2();
40781 let fixupimm: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:0b11111111, _MM_FROUND_CUR_DIRECTION);
40782 let fixupimm: f64 = simd_extract!(fixupimm, 0);
40783 let r: f64x2 = simd_insert!(a, 0, fixupimm);
40784 transmute(src:r)
40785 }
40786}
40787
40788/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40789///
40790/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_sd&expand=2515)
40791#[inline]
40792#[target_feature(enable = "avx512f")]
40793#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40794#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40795#[rustc_legacy_const_generics(4)]
40796pub fn _mm_mask_fixupimm_sd<const IMM8: i32>(
40797 a: __m128d,
40798 k: __mmask8,
40799 b: __m128d,
40800 c: __m128i,
40801) -> __m128d {
40802 unsafe {
40803 static_assert_uimm_bits!(IMM8, 8);
40804 let a: f64x2 = a.as_f64x2();
40805 let b: f64x2 = b.as_f64x2();
40806 let c: i64x2 = c.as_i64x2();
40807 let fixupimm: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
40808 let fixupimm: f64 = simd_extract!(fixupimm, 0);
40809 let r: f64x2 = simd_insert!(a, 0, fixupimm);
40810 transmute(src:r)
40811 }
40812}
40813
40814/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.
40815///
40816/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_sd&expand=2516)
40817#[inline]
40818#[target_feature(enable = "avx512f")]
40819#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40820#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0))]
40821#[rustc_legacy_const_generics(4)]
40822pub fn _mm_maskz_fixupimm_sd<const IMM8: i32>(
40823 k: __mmask8,
40824 a: __m128d,
40825 b: __m128d,
40826 c: __m128i,
40827) -> __m128d {
40828 unsafe {
40829 static_assert_uimm_bits!(IMM8, 8);
40830 let a: f64x2 = a.as_f64x2();
40831 let b: f64x2 = b.as_f64x2();
40832 let c: i64x2 = c.as_i64x2();
40833 let fixupimm: f64x2 = vfixupimmsdz(a, b, c, IMM8, mask:k, _MM_FROUND_CUR_DIRECTION);
40834 let fixupimm: f64 = simd_extract!(fixupimm, 0);
40835 let r: f64x2 = simd_insert!(a, 0, fixupimm);
40836 transmute(src:r)
40837 }
40838}
40839
40840/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40841/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40842///
40843/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_ss&expand=2511)
40844#[inline]
40845#[target_feature(enable = "avx512f")]
40846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40847#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40848#[rustc_legacy_const_generics(3, 4)]
40849pub fn _mm_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40850 a: __m128,
40851 b: __m128,
40852 c: __m128i,
40853) -> __m128 {
40854 unsafe {
40855 static_assert_uimm_bits!(IMM8, 8);
40856 static_assert_mantissas_sae!(SAE);
40857 let a: f32x4 = a.as_f32x4();
40858 let b: f32x4 = b.as_f32x4();
40859 let c: i32x4 = c.as_i32x4();
40860 let r: f32x4 = vfixupimmss(a, b, c, IMM8, mask:0b11111111, SAE);
40861 let fixupimm: f32 = simd_extract!(r, 0);
40862 let r: f32x4 = simd_insert!(a, 0, fixupimm);
40863 transmute(src:r)
40864 }
40865}
40866
40867/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40868/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40869///
40870/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_ss&expand=2512)
40871#[inline]
40872#[target_feature(enable = "avx512f")]
40873#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40874#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40875#[rustc_legacy_const_generics(4, 5)]
40876pub fn _mm_mask_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40877 a: __m128,
40878 k: __mmask8,
40879 b: __m128,
40880 c: __m128i,
40881) -> __m128 {
40882 unsafe {
40883 static_assert_uimm_bits!(IMM8, 8);
40884 static_assert_mantissas_sae!(SAE);
40885 let a: f32x4 = a.as_f32x4();
40886 let b: f32x4 = b.as_f32x4();
40887 let c: i32x4 = c.as_i32x4();
40888 let r: f32x4 = vfixupimmss(a, b, c, IMM8, mask:k, SAE);
40889 let fixupimm: f32 = simd_extract!(r, 0);
40890 let r: f32x4 = simd_insert!(a, 0, fixupimm);
40891 transmute(src:r)
40892 }
40893}
40894
40895/// Fix up the lower single-precision (32-bit) floating-point elements in a and b using the lower 32-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst. imm8 is used to set the required flags reporting.\
40896/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40897///
40898/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_ss&expand=2513)
40899#[inline]
40900#[target_feature(enable = "avx512f")]
40901#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40902#[cfg_attr(test, assert_instr(vfixupimmss, IMM8 = 0, SAE = 8))]
40903#[rustc_legacy_const_generics(4, 5)]
40904pub fn _mm_maskz_fixupimm_round_ss<const IMM8: i32, const SAE: i32>(
40905 k: __mmask8,
40906 a: __m128,
40907 b: __m128,
40908 c: __m128i,
40909) -> __m128 {
40910 unsafe {
40911 static_assert_uimm_bits!(IMM8, 8);
40912 static_assert_mantissas_sae!(SAE);
40913 let a: f32x4 = a.as_f32x4();
40914 let b: f32x4 = b.as_f32x4();
40915 let c: i32x4 = c.as_i32x4();
40916 let r: f32x4 = vfixupimmssz(a, b, c, IMM8, mask:k, SAE);
40917 let fixupimm: f32 = simd_extract!(r, 0);
40918 let r: f32x4 = simd_insert!(a, 0, fixupimm);
40919 transmute(src:r)
40920 }
40921}
40922
40923/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
40924/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40925///
40926/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_fixupimm_round_sd&expand=2508)
40927#[inline]
40928#[target_feature(enable = "avx512f")]
40929#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40930#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
40931#[rustc_legacy_const_generics(3, 4)]
40932pub fn _mm_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
40933 a: __m128d,
40934 b: __m128d,
40935 c: __m128i,
40936) -> __m128d {
40937 unsafe {
40938 static_assert_uimm_bits!(IMM8, 8);
40939 static_assert_mantissas_sae!(SAE);
40940 let a: f64x2 = a.as_f64x2();
40941 let b: f64x2 = b.as_f64x2();
40942 let c: i64x2 = c.as_i64x2();
40943 let r: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:0b11111111, SAE);
40944 let fixupimm: f64 = simd_extract!(r, 0);
40945 let r: f64x2 = simd_insert!(a, 0, fixupimm);
40946 transmute(src:r)
40947 }
40948}
40949
40950/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using writemask k (the element is copied from a when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
40951/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40952///
40953/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_fixupimm_round_sd&expand=2509)
40954#[inline]
40955#[target_feature(enable = "avx512f")]
40956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40957#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
40958#[rustc_legacy_const_generics(4, 5)]
40959pub fn _mm_mask_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
40960 a: __m128d,
40961 k: __mmask8,
40962 b: __m128d,
40963 c: __m128i,
40964) -> __m128d {
40965 unsafe {
40966 static_assert_uimm_bits!(IMM8, 8);
40967 static_assert_mantissas_sae!(SAE);
40968 let a: f64x2 = a.as_f64x2();
40969 let b: f64x2 = b.as_f64x2();
40970 let c: i64x2 = c.as_i64x2();
40971 let r: f64x2 = vfixupimmsd(a, b, c, IMM8, mask:k, SAE);
40972 let fixupimm: f64 = simd_extract!(r, 0);
40973 let r: f64x2 = simd_insert!(a, 0, fixupimm);
40974 transmute(src:r)
40975 }
40976}
40977
40978/// Fix up the lower double-precision (64-bit) floating-point elements in a and b using the lower 64-bit integer in c, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst. imm8 is used to set the required flags reporting.\
40979/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
40980///
40981/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_fixupimm_round_sd&expand=2510)
40982#[inline]
40983#[target_feature(enable = "avx512f")]
40984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
40985#[cfg_attr(test, assert_instr(vfixupimmsd, IMM8 = 0, SAE = 8))]
40986#[rustc_legacy_const_generics(4, 5)]
40987pub fn _mm_maskz_fixupimm_round_sd<const IMM8: i32, const SAE: i32>(
40988 k: __mmask8,
40989 a: __m128d,
40990 b: __m128d,
40991 c: __m128i,
40992) -> __m128d {
40993 unsafe {
40994 static_assert_uimm_bits!(IMM8, 8);
40995 static_assert_mantissas_sae!(SAE);
40996 let a: f64x2 = a.as_f64x2();
40997 let b: f64x2 = b.as_f64x2();
40998 let c: i64x2 = c.as_i64x2();
40999 let r: f64x2 = vfixupimmsdz(a, b, c, IMM8, mask:k, SAE);
41000 let fixupimm: f64 = simd_extract!(r, 0);
41001 let r: f64x2 = simd_insert!(a, 0, fixupimm);
41002 transmute(src:r)
41003 }
41004}
41005
41006/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41007///
41008/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtss_sd&expand=1896)
41009#[inline]
41010#[target_feature(enable = "avx512f")]
41011#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41012#[cfg_attr(test, assert_instr(vcvtss2sd))]
41013pub fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41014 unsafe {
41015 transmute(src:vcvtss2sd(
41016 a.as_f64x2(),
41017 b.as_f32x4(),
41018 src.as_f64x2(),
41019 mask:k,
41020 _MM_FROUND_CUR_DIRECTION,
41021 ))
41022 }
41023}
41024
41025/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.
41026///
41027/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtss_sd&expand=1897)
41028#[inline]
41029#[target_feature(enable = "avx512f")]
41030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41031#[cfg_attr(test, assert_instr(vcvtss2sd))]
41032pub fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41033 unsafe {
41034 transmute(src:vcvtss2sd(
41035 a.as_f64x2(),
41036 b.as_f32x4(),
41037 src:f64x2::ZERO,
41038 mask:k,
41039 _MM_FROUND_CUR_DIRECTION,
41040 ))
41041 }
41042}
41043
41044/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41045///
41046/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvtsd_ss&expand=1797)
41047#[inline]
41048#[target_feature(enable = "avx512f")]
41049#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41050#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41051pub fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41052 unsafe {
41053 transmute(src:vcvtsd2ss(
41054 a.as_f32x4(),
41055 b.as_f64x2(),
41056 src.as_f32x4(),
41057 mask:k,
41058 _MM_FROUND_CUR_DIRECTION,
41059 ))
41060 }
41061}
41062
41063/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.
41064///
41065/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvtsd_ss&expand=1798)
41066#[inline]
41067#[target_feature(enable = "avx512f")]
41068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41069#[cfg_attr(test, assert_instr(vcvtsd2ss))]
41070pub fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41071 unsafe {
41072 transmute(src:vcvtsd2ss(
41073 a.as_f32x4(),
41074 b.as_f64x2(),
41075 src:f32x4::ZERO,
41076 mask:k,
41077 _MM_FROUND_CUR_DIRECTION,
41078 ))
41079 }
41080}
41081
41082/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
41083/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41084///
41085/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_sd&expand=1371)
41086#[inline]
41087#[target_feature(enable = "avx512f")]
41088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41089#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41090#[rustc_legacy_const_generics(2)]
41091pub fn _mm_cvt_roundss_sd<const SAE: i32>(a: __m128d, b: __m128) -> __m128d {
41092 unsafe {
41093 static_assert_sae!(SAE);
41094 let a: f64x2 = a.as_f64x2();
41095 let b: f32x4 = b.as_f32x4();
41096 let r: f64x2 = vcvtss2sd(a, b, src:f64x2::ZERO, mask:0b11111111, SAE);
41097 transmute(src:r)
41098 }
41099}
41100
41101/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41102/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41103///
41104/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundss_sd&expand=1372)
41105#[inline]
41106#[target_feature(enable = "avx512f")]
41107#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41108#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41109#[rustc_legacy_const_generics(4)]
41110pub fn _mm_mask_cvt_roundss_sd<const SAE: i32>(
41111 src: __m128d,
41112 k: __mmask8,
41113 a: __m128d,
41114 b: __m128,
41115) -> __m128d {
41116 unsafe {
41117 static_assert_sae!(SAE);
41118 let a: f64x2 = a.as_f64x2();
41119 let b: f32x4 = b.as_f32x4();
41120 let src: f64x2 = src.as_f64x2();
41121 let r: f64x2 = vcvtss2sd(a, b, src, mask:k, SAE);
41122 transmute(src:r)
41123 }
41124}
41125
41126/// Convert the lower single-precision (32-bit) floating-point element in b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper element from a to the upper element of dst.\
41127/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41128///
41129/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundss_sd&expand=1373)
41130#[inline]
41131#[target_feature(enable = "avx512f")]
41132#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41133#[cfg_attr(test, assert_instr(vcvtss2sd, SAE = 8))]
41134#[rustc_legacy_const_generics(3)]
41135pub fn _mm_maskz_cvt_roundss_sd<const SAE: i32>(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
41136 unsafe {
41137 static_assert_sae!(SAE);
41138 let a: f64x2 = a.as_f64x2();
41139 let b: f32x4 = b.as_f32x4();
41140 let r: f64x2 = vcvtss2sd(a, b, src:f64x2::ZERO, mask:k, SAE);
41141 transmute(src:r)
41142 }
41143}
41144
41145/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41146/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41147/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41148/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41149/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41150/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41151/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41152///
41153/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_ss&expand=1361)
41154#[inline]
41155#[target_feature(enable = "avx512f")]
41156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41157#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41158#[rustc_legacy_const_generics(2)]
41159pub fn _mm_cvt_roundsd_ss<const ROUNDING: i32>(a: __m128, b: __m128d) -> __m128 {
41160 unsafe {
41161 static_assert_rounding!(ROUNDING);
41162 let a: f32x4 = a.as_f32x4();
41163 let b: f64x2 = b.as_f64x2();
41164 let r: f32x4 = vcvtsd2ss(a, b, src:f32x4::ZERO, mask:0b11111111, ROUNDING);
41165 transmute(src:r)
41166 }
41167}
41168
41169/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using writemask k (the element is copied from src when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41170/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41171/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41172/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41173/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41174/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41175/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41176///
41177/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_mask_cvt_roundsd_ss&expand=1362)
41178#[inline]
41179#[target_feature(enable = "avx512f")]
41180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41181#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41182#[rustc_legacy_const_generics(4)]
41183pub fn _mm_mask_cvt_roundsd_ss<const ROUNDING: i32>(
41184 src: __m128,
41185 k: __mmask8,
41186 a: __m128,
41187 b: __m128d,
41188) -> __m128 {
41189 unsafe {
41190 static_assert_rounding!(ROUNDING);
41191 let a: f32x4 = a.as_f32x4();
41192 let b: f64x2 = b.as_f64x2();
41193 let src: f32x4 = src.as_f32x4();
41194 let r: f32x4 = vcvtsd2ss(a, b, src, mask:k, ROUNDING);
41195 transmute(src:r)
41196 }
41197}
41198
41199/// Convert the lower double-precision (64-bit) floating-point element in b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst using zeromask k (the element is zeroed out when mask bit 0 is not set), and copy the upper 3 packed elements from a to the upper elements of dst.\
41200/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41201/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41202/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41203/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41204/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41205/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41206///
41207/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_maskz_cvt_roundsd_ss&expand=1363)
41208#[inline]
41209#[target_feature(enable = "avx512f")]
41210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41211#[cfg_attr(test, assert_instr(vcvtsd2ss, ROUNDING = 8))]
41212#[rustc_legacy_const_generics(3)]
41213pub fn _mm_maskz_cvt_roundsd_ss<const ROUNDING: i32>(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
41214 unsafe {
41215 static_assert_rounding!(ROUNDING);
41216 let a: f32x4 = a.as_f32x4();
41217 let b: f64x2 = b.as_f64x2();
41218 let r: f32x4 = vcvtsd2ss(a, b, src:f32x4::ZERO, mask:k, ROUNDING);
41219 transmute(src:r)
41220 }
41221}
41222
41223/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41224/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41225/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41226/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41227/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41228/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41229/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41230///
41231/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_si32&expand=1374)
41232#[inline]
41233#[target_feature(enable = "avx512f")]
41234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41235#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
41236#[rustc_legacy_const_generics(1)]
41237pub fn _mm_cvt_roundss_si32<const ROUNDING: i32>(a: __m128) -> i32 {
41238 unsafe {
41239 static_assert_rounding!(ROUNDING);
41240 let a: f32x4 = a.as_f32x4();
41241 vcvtss2si(a, ROUNDING)
41242 }
41243}
41244
41245/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41246/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41247/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41248/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41249/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41250/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41251/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41252///
41253/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_i32&expand=1369)
41254#[inline]
41255#[target_feature(enable = "avx512f")]
41256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41257#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
41258#[rustc_legacy_const_generics(1)]
41259pub fn _mm_cvt_roundss_i32<const ROUNDING: i32>(a: __m128) -> i32 {
41260 unsafe {
41261 static_assert_rounding!(ROUNDING);
41262 let a: f32x4 = a.as_f32x4();
41263 vcvtss2si(a, ROUNDING)
41264 }
41265}
41266
41267/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41268/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41269/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41270/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41271/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41272/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41273/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41274///
41275/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundss_u32&expand=1376)
41276#[inline]
41277#[target_feature(enable = "avx512f")]
41278#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41279#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))]
41280#[rustc_legacy_const_generics(1)]
41281pub fn _mm_cvt_roundss_u32<const ROUNDING: i32>(a: __m128) -> u32 {
41282 unsafe {
41283 static_assert_rounding!(ROUNDING);
41284 let a: f32x4 = a.as_f32x4();
41285 vcvtss2usi(a, ROUNDING)
41286 }
41287}
41288
41289/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41290///
41291/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_i32&expand=1893)
41292#[inline]
41293#[target_feature(enable = "avx512f")]
41294#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41295#[cfg_attr(test, assert_instr(vcvtss2si))]
41296pub fn _mm_cvtss_i32(a: __m128) -> i32 {
41297 unsafe { vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41298}
41299
41300/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41301///
41302/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtss_u32&expand=1901)
41303#[inline]
41304#[target_feature(enable = "avx512f")]
41305#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41306#[cfg_attr(test, assert_instr(vcvtss2usi))]
41307pub fn _mm_cvtss_u32(a: __m128) -> u32 {
41308 unsafe { vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41309}
41310
41311/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41312/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41313/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41314/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41315/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41316/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41317/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41318///
41319/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_si32&expand=1359)
41320#[inline]
41321#[target_feature(enable = "avx512f")]
41322#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41323#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
41324#[rustc_legacy_const_generics(1)]
41325pub fn _mm_cvt_roundsd_si32<const ROUNDING: i32>(a: __m128d) -> i32 {
41326 unsafe {
41327 static_assert_rounding!(ROUNDING);
41328 let a: f64x2 = a.as_f64x2();
41329 vcvtsd2si(a, ROUNDING)
41330 }
41331}
41332
41333/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer, and store the result in dst.\
41334/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41335/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41336/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41337/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41338/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41339/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41340///
41341/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsd_i32&expand=1357)
41342#[inline]
41343#[target_feature(enable = "avx512f")]
41344#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41345#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
41346#[rustc_legacy_const_generics(1)]
41347pub fn _mm_cvt_roundsd_i32<const ROUNDING: i32>(a: __m128d) -> i32 {
41348 unsafe {
41349 static_assert_rounding!(ROUNDING);
41350 let a: f64x2 = a.as_f64x2();
41351 vcvtsd2si(a, ROUNDING)
41352 }
41353}
41354
41355/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.\
41356/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41357/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41358/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41359/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41360/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41361/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41362///
41363/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvt_roundsd_u32&expand=1364)
41364#[inline]
41365#[target_feature(enable = "avx512f")]
41366#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41367#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))]
41368#[rustc_legacy_const_generics(1)]
41369pub fn _mm_cvt_roundsd_u32<const ROUNDING: i32>(a: __m128d) -> u32 {
41370 unsafe {
41371 static_assert_rounding!(ROUNDING);
41372 let a: f64x2 = a.as_f64x2();
41373 vcvtsd2usi(a, ROUNDING)
41374 }
41375}
41376
41377/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer, and store the result in dst.
41378///
41379/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_i32&expand=1791)
41380#[inline]
41381#[target_feature(enable = "avx512f")]
41382#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41383#[cfg_attr(test, assert_instr(vcvtsd2si))]
41384pub fn _mm_cvtsd_i32(a: __m128d) -> i32 {
41385 unsafe { vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41386}
41387
41388/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer, and store the result in dst.
41389///
41390/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtsd_u32&expand=1799)
41391#[inline]
41392#[target_feature(enable = "avx512f")]
41393#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41394#[cfg_attr(test, assert_instr(vcvtsd2usi))]
41395pub fn _mm_cvtsd_u32(a: __m128d) -> u32 {
41396 unsafe { vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41397}
41398
41399/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41400///
41401/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41402/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41403/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41404/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41405/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41406/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41407///
41408/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundi32_ss&expand=1312)
41409#[inline]
41410#[target_feature(enable = "avx512f")]
41411#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41412#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
41413#[rustc_legacy_const_generics(2)]
41414pub fn _mm_cvt_roundi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41415 unsafe {
41416 static_assert_rounding!(ROUNDING);
41417 let a: f32x4 = a.as_f32x4();
41418 let r: f32x4 = vcvtsi2ss(a, b, ROUNDING);
41419 transmute(src:r)
41420 }
41421}
41422
41423/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41424///
41425/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41426/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41427/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41428/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41429/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41430/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41431///
41432/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundsi32_ss&expand=1366)
41433#[inline]
41434#[target_feature(enable = "avx512f")]
41435#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41436#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
41437#[rustc_legacy_const_generics(2)]
41438pub fn _mm_cvt_roundsi32_ss<const ROUNDING: i32>(a: __m128, b: i32) -> __m128 {
41439 unsafe {
41440 static_assert_rounding!(ROUNDING);
41441 let a: f32x4 = a.as_f32x4();
41442 let r: f32x4 = vcvtsi2ss(a, b, ROUNDING);
41443 transmute(src:r)
41444 }
41445}
41446
41447/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
41448/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
41449/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
41450/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
41451/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
41452/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
41453/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
41454///
41455/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvt_roundu32_ss&expand=1378)
41456#[inline]
41457#[target_feature(enable = "avx512f")]
41458#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41459#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))]
41460#[rustc_legacy_const_generics(2)]
41461pub fn _mm_cvt_roundu32_ss<const ROUNDING: i32>(a: __m128, b: u32) -> __m128 {
41462 unsafe {
41463 static_assert_rounding!(ROUNDING);
41464 let a: f32x4 = a.as_f32x4();
41465 let r: f32x4 = vcvtusi2ss(a, b, ROUNDING);
41466 transmute(src:r)
41467 }
41468}
41469
41470/// Convert the signed 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41471///
41472/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_ss&expand=1643)
41473#[inline]
41474#[target_feature(enable = "avx512f")]
41475#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41476#[cfg_attr(test, assert_instr(vcvtsi2ss))]
41477pub fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
41478 unsafe {
41479 let b: f32 = b as f32;
41480 simd_insert!(a, 0, b)
41481 }
41482}
41483
41484/// Convert the signed 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41485///
41486/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvti32_sd&expand=1642)
41487#[inline]
41488#[target_feature(enable = "avx512f")]
41489#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41490#[cfg_attr(test, assert_instr(vcvtsi2sd))]
41491pub fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
41492 unsafe {
41493 let b: f64 = b as f64;
41494 simd_insert!(a, 0, b)
41495 }
41496}
41497
41498/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41499/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41500///
41501/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_si32&expand=1936)
41502#[inline]
41503#[target_feature(enable = "avx512f")]
41504#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41505#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
41506#[rustc_legacy_const_generics(1)]
41507pub fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
41508 unsafe {
41509 static_assert_sae!(SAE);
41510 let a: f32x4 = a.as_f32x4();
41511 vcvttss2si(a, SAE)
41512 }
41513}
41514
41515/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41516/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41517///
41518/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_i32&expand=1934)
41519#[inline]
41520#[target_feature(enable = "avx512f")]
41521#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41522#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
41523#[rustc_legacy_const_generics(1)]
41524pub fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
41525 unsafe {
41526 static_assert_sae!(SAE);
41527 let a: f32x4 = a.as_f32x4();
41528 vcvttss2si(a, SAE)
41529 }
41530}
41531
41532/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41533/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41534///
41535/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundss_u32&expand=1938)
41536#[inline]
41537#[target_feature(enable = "avx512f")]
41538#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41539#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))]
41540#[rustc_legacy_const_generics(1)]
41541pub fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
41542 unsafe {
41543 static_assert_sae!(SAE);
41544 let a: f32x4 = a.as_f32x4();
41545 vcvttss2usi(a, SAE)
41546 }
41547}
41548
41549/// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41550///
41551/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_i32&expand=2022)
41552#[inline]
41553#[target_feature(enable = "avx512f")]
41554#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41555#[cfg_attr(test, assert_instr(vcvttss2si))]
41556pub fn _mm_cvttss_i32(a: __m128) -> i32 {
41557 unsafe { vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41558}
41559
41560/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41561///
41562/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttss_u32&expand=2026)
41563#[inline]
41564#[target_feature(enable = "avx512f")]
41565#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41566#[cfg_attr(test, assert_instr(vcvttss2usi))]
41567pub fn _mm_cvttss_u32(a: __m128) -> u32 {
41568 unsafe { vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) }
41569}
41570
41571/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41572/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41573///
41574/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_si32&expand=1930)
41575#[inline]
41576#[target_feature(enable = "avx512f")]
41577#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41578#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
41579#[rustc_legacy_const_generics(1)]
41580pub fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
41581 unsafe {
41582 static_assert_sae!(SAE);
41583 let a: f64x2 = a.as_f64x2();
41584 vcvttsd2si(a, SAE)
41585 }
41586}
41587
41588/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
41589/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41590///
41591/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtt_roundsd_i32&expand=1928)
41592#[inline]
41593#[target_feature(enable = "avx512f")]
41594#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41595#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
41596#[rustc_legacy_const_generics(1)]
41597pub fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
41598 unsafe {
41599 static_assert_sae!(SAE);
41600 let a: f64x2 = a.as_f64x2();
41601 vcvttsd2si(a, SAE)
41602 }
41603}
41604
41605/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
41606/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41607///
41608/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=mm_cvtt_roundsd_u32&expand=1932)
41609#[inline]
41610#[target_feature(enable = "avx512f")]
41611#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41612#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))]
41613#[rustc_legacy_const_generics(1)]
41614pub fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
41615 unsafe {
41616 static_assert_sae!(SAE);
41617 let a: f64x2 = a.as_f64x2();
41618 vcvttsd2usi(a, SAE)
41619 }
41620}
41621
41622/// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
41623///
41624/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_i32&expand=2015)
41625#[inline]
41626#[target_feature(enable = "avx512f")]
41627#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41628#[cfg_attr(test, assert_instr(vcvttsd2si))]
41629pub fn _mm_cvttsd_i32(a: __m128d) -> i32 {
41630 unsafe { vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41631}
41632
41633/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
41634///
41635/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvttsd_u32&expand=2020)
41636#[inline]
41637#[target_feature(enable = "avx512f")]
41638#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41639#[cfg_attr(test, assert_instr(vcvttsd2usi))]
41640pub fn _mm_cvttsd_u32(a: __m128d) -> u32 {
41641 unsafe { vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) }
41642}
41643
41644/// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
41645///
41646/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_ss&expand=2032)
41647#[inline]
41648#[target_feature(enable = "avx512f")]
41649#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41650#[cfg_attr(test, assert_instr(vcvtusi2ss))]
41651pub fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
41652 unsafe {
41653 let b: f32 = b as f32;
41654 simd_insert!(a, 0, b)
41655 }
41656}
41657
41658/// Convert the unsigned 32-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
41659///
41660/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cvtu32_sd&expand=2031)
41661#[inline]
41662#[target_feature(enable = "avx512f")]
41663#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41664#[cfg_attr(test, assert_instr(vcvtusi2sd))]
41665pub fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
41666 unsafe {
41667 let b: f64 = b as f64;
41668 simd_insert!(a, 0, b)
41669 }
41670}
41671
41672/// Compare the lower single-precision (32-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41673/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41674///
41675/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_ss&expand=1175)
41676#[inline]
41677#[target_feature(enable = "avx512f")]
41678#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41679#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomiss
41680#[rustc_legacy_const_generics(2, 3)]
41681pub fn _mm_comi_round_ss<const IMM5: i32, const SAE: i32>(a: __m128, b: __m128) -> i32 {
41682 unsafe {
41683 static_assert_uimm_bits!(IMM5, 5);
41684 static_assert_mantissas_sae!(SAE);
41685 let a: f32x4 = a.as_f32x4();
41686 let b: f32x4 = b.as_f32x4();
41687 vcomiss(a, b, IMM5, SAE)
41688 }
41689}
41690
41691/// Compare the lower double-precision (64-bit) floating-point element in a and b based on the comparison operand specified by imm8, and return the boolean result (0 or 1).\
41692/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
41693///
41694/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_comi_round_sd&expand=1174)
41695#[inline]
41696#[target_feature(enable = "avx512f")]
41697#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41698#[cfg_attr(test, assert_instr(vcmp, IMM5 = 5, SAE = 4))] //should be vcomisd
41699#[rustc_legacy_const_generics(2, 3)]
41700pub fn _mm_comi_round_sd<const IMM5: i32, const SAE: i32>(a: __m128d, b: __m128d) -> i32 {
41701 unsafe {
41702 static_assert_uimm_bits!(IMM5, 5);
41703 static_assert_mantissas_sae!(SAE);
41704 let a: f64x2 = a.as_f64x2();
41705 let b: f64x2 = b.as_f64x2();
41706 vcomisd(a, b, IMM5, SAE)
41707 }
41708}
41709
41710/// Equal
41711#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41712pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
41713/// Less-than
41714#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41715pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
41716/// Less-than-or-equal
41717#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41718pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
41719/// False
41720#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41721pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
41722/// Not-equal
41723#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41724pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
41725/// Not less-than
41726#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41727pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
41728/// Not less-than-or-equal
41729#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41730pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
41731/// True
41732#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41733pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
41734
41735/// interval [1, 2)
41736#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41737pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00;
41738/// interval [0.5, 2)
41739#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41740pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01;
41741/// interval [0.5, 1)
41742#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41743pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02;
41744/// interval [0.75, 1.5)
41745#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41746pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03;
41747
41748/// sign = sign(SRC)
41749#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41750pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00;
41751/// sign = 0
41752#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41753pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01;
41754/// DEST = NaN if sign(SRC) = 1
41755#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41756pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02;
41757
41758#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41759pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00;
41760#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41761pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01;
41762#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41763pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02;
41764#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41765pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03;
41766#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41767pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04;
41768#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41769pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05;
41770#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41771pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06;
41772#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41773pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07;
41774#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41775pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08;
41776#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41777pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09;
41778#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41779pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A;
41780#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41781pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B;
41782#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41783pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C;
41784#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41785pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D;
41786#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41787pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E;
41788#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41789pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F;
41790#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41791pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10;
41792#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41793pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11;
41794#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41795pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12;
41796#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41797pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13;
41798#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41799pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14;
41800#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41801pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15;
41802#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41803pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16;
41804#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41805pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17;
41806#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41807pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18;
41808#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41809pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19;
41810#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41811pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A;
41812#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41813pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B;
41814#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41815pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C;
41816#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41817pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D;
41818#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41819pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E;
41820#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41821pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F;
41822#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41823pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20;
41824#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41825pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21;
41826#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41827pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22;
41828#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41829pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23;
41830#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41831pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24;
41832#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41833pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25;
41834#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41835pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26;
41836#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41837pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27;
41838#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41839pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28;
41840#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41841pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29;
41842#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41843pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A;
41844#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41845pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B;
41846#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41847pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C;
41848#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41849pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D;
41850#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41851pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E;
41852#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41853pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F;
41854#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41855pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30;
41856#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41857pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31;
41858#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41859pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32;
41860#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41861pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33;
41862#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41863pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34;
41864#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41865pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35;
41866#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41867pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36;
41868#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41869pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37;
41870#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41871pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38;
41872#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41873pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39;
41874#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41875pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A;
41876#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41877pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B;
41878#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41879pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C;
41880#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41881pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D;
41882#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41883pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E;
41884#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41885pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F;
41886#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41887pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40;
41888#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41889pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41;
41890#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41891pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42;
41892#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41893pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43;
41894#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41895pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44;
41896#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41897pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45;
41898#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41899pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46;
41900#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41901pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47;
41902#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41903pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48;
41904#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41905pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49;
41906#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41907pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A;
41908#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41909pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B;
41910#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41911pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C;
41912#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41913pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D;
41914#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41915pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E;
41916#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41917pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F;
41918#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41919pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50;
41920#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41921pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51;
41922#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41923pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52;
41924#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41925pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53;
41926#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41927pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54;
41928#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41929pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55;
41930#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41931pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56;
41932#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41933pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57;
41934#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41935pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58;
41936#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41937pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59;
41938#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41939pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A;
41940#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41941pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B;
41942#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41943pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C;
41944#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41945pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D;
41946#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41947pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E;
41948#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41949pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F;
41950#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41951pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60;
41952#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41953pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61;
41954#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41955pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62;
41956#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41957pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63;
41958#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41959pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64;
41960#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41961pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65;
41962#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41963pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66;
41964#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41965pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67;
41966#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41967pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68;
41968#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41969pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69;
41970#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41971pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A;
41972#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41973pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B;
41974#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41975pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C;
41976#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41977pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D;
41978#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41979pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E;
41980#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41981pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F;
41982#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41983pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70;
41984#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41985pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71;
41986#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41987pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72;
41988#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41989pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73;
41990#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41991pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74;
41992#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41993pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75;
41994#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41995pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76;
41996#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41997pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77;
41998#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
41999pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78;
42000#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42001pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79;
42002#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42003pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A;
42004#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42005pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B;
42006#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42007pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C;
42008#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42009pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D;
42010#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42011pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E;
42012#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42013pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F;
42014#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42015pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80;
42016#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42017pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81;
42018#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42019pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82;
42020#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42021pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83;
42022#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42023pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84;
42024#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42025pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85;
42026#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42027pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86;
42028#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42029pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87;
42030#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42031pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88;
42032#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42033pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89;
42034#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42035pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A;
42036#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42037pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B;
42038#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42039pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C;
42040#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42041pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D;
42042#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42043pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E;
42044#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42045pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F;
42046#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42047pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90;
42048#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42049pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91;
42050#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42051pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92;
42052#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42053pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93;
42054#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42055pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94;
42056#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42057pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95;
42058#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42059pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96;
42060#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42061pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97;
42062#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42063pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98;
42064#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42065pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99;
42066#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42067pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A;
42068#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42069pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B;
42070#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42071pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C;
42072#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42073pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D;
42074#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42075pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E;
42076#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42077pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F;
42078#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42079pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0;
42080#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42081pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1;
42082#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42083pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2;
42084#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42085pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3;
42086#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42087pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4;
42088#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42089pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5;
42090#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42091pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6;
42092#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42093pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7;
42094#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42095pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8;
42096#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42097pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9;
42098#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42099pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA;
42100#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42101pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB;
42102#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42103pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC;
42104#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42105pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD;
42106#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42107pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE;
42108#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42109pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF;
42110#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42111pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0;
42112#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42113pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1;
42114#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42115pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2;
42116#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42117pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3;
42118#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42119pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4;
42120#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42121pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5;
42122#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42123pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6;
42124#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42125pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7;
42126#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42127pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8;
42128#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42129pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9;
42130#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42131pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA;
42132#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42133pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB;
42134#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42135pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC;
42136#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42137pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD;
42138#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42139pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE;
42140#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42141pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF;
42142#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42143pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0;
42144#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42145pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1;
42146#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42147pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2;
42148#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42149pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3;
42150#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42151pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4;
42152#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42153pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5;
42154#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42155pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6;
42156#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42157pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7;
42158#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42159pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8;
42160#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42161pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9;
42162#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42163pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA;
42164#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42165pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB;
42166#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42167pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC;
42168#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42169pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD;
42170#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42171pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE;
42172#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42173pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF;
42174#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42175pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0;
42176#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42177pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1;
42178#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42179pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2;
42180#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42181pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3;
42182#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42183pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4;
42184#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42185pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5;
42186#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42187pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6;
42188#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42189pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7;
42190#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42191pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8;
42192#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42193pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9;
42194#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42195pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA;
42196#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42197pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB;
42198#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42199pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC;
42200#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42201pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD;
42202#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42203pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE;
42204#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42205pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF;
42206#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42207pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0;
42208#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42209pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1;
42210#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42211pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2;
42212#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42213pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3;
42214#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42215pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4;
42216#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42217pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5;
42218#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42219pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6;
42220#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42221pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7;
42222#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42223pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8;
42224#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42225pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9;
42226#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42227pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA;
42228#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42229pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB;
42230#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42231pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC;
42232#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42233pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED;
42234#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42235pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE;
42236#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42237pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF;
42238#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42239pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0;
42240#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42241pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1;
42242#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42243pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2;
42244#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42245pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3;
42246#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42247pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4;
42248#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42249pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5;
42250#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42251pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6;
42252#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42253pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7;
42254#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42255pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8;
42256#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42257pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9;
42258#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42259pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA;
42260#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42261pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB;
42262#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42263pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC;
42264#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42265pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD;
42266#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42267pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
42268#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
42269pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
42270
42271#[allow(improper_ctypes)]
42272unsafe extern "C" {
42273 #[link_name = "llvm.x86.avx512.sqrt.ps.512"]
42274 unsafefn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
42275 #[link_name = "llvm.x86.avx512.sqrt.pd.512"]
42276 unsafefn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
42277
42278 #[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
42279 unsafefn vfmadd132psround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512;
42280 #[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
42281 unsafefn vfmadd132pdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d;
42282
42283 #[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
42284 unsafefn vfmaddsubpsround(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512; //from clang
42285 #[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
42286 unsafefn vfmaddsubpdround(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d; //from clang
42287
42288 #[link_name = "llvm.x86.avx512.add.ps.512"]
42289 unsafefn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42290 #[link_name = "llvm.x86.avx512.add.pd.512"]
42291 unsafefn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42292 #[link_name = "llvm.x86.avx512.sub.ps.512"]
42293 unsafefn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42294 #[link_name = "llvm.x86.avx512.sub.pd.512"]
42295 unsafefn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42296 #[link_name = "llvm.x86.avx512.mul.ps.512"]
42297 unsafefn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42298 #[link_name = "llvm.x86.avx512.mul.pd.512"]
42299 unsafefn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42300 #[link_name = "llvm.x86.avx512.div.ps.512"]
42301 unsafefn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
42302 #[link_name = "llvm.x86.avx512.div.pd.512"]
42303 unsafefn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
42304
42305 #[link_name = "llvm.x86.avx512.max.ps.512"]
42306 unsafefn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42307 #[link_name = "llvm.x86.avx512.max.pd.512"]
42308 unsafefn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42309 #[link_name = "llvm.x86.avx512.min.ps.512"]
42310 unsafefn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
42311 #[link_name = "llvm.x86.avx512.min.pd.512"]
42312 unsafefn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
42313
42314 #[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
42315 unsafefn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
42316
42317 #[link_name = "llvm.x86.avx512.mask.getexp.ps.256"]
42318 unsafefn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42319 #[link_name = "llvm.x86.avx512.mask.getexp.ps.128"]
42320 unsafefn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42321
42322 #[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
42323 unsafefn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
42324 #[link_name = "llvm.x86.avx512.mask.getexp.pd.256"]
42325 unsafefn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42326 #[link_name = "llvm.x86.avx512.mask.getexp.pd.128"]
42327 unsafefn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42328
42329 #[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
42330 unsafefn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
42331 #[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"]
42332 unsafefn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8;
42333 #[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"]
42334 unsafefn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4;
42335
42336 #[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
42337 unsafefn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
42338 #[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"]
42339 unsafefn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4;
42340 #[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"]
42341 unsafefn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2;
42342
42343 #[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
42344 unsafefn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
42345 #[link_name = "llvm.x86.avx512.mask.scalef.ps.256"]
42346 unsafefn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8;
42347 #[link_name = "llvm.x86.avx512.mask.scalef.ps.128"]
42348 unsafefn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
42349
42350 #[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
42351 unsafefn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
42352 #[link_name = "llvm.x86.avx512.mask.scalef.pd.256"]
42353 unsafefn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4;
42354 #[link_name = "llvm.x86.avx512.mask.scalef.pd.128"]
42355 unsafefn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
42356
42357 #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
42358 unsafefn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42359 #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"]
42360 unsafefn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42361 #[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"]
42362 unsafefn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42363
42364 #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
42365 unsafefn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42366 #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"]
42367 unsafefn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42368 #[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"]
42369 unsafefn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42370
42371 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
42372 unsafefn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
42373 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"]
42374 unsafefn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
42375 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"]
42376 unsafefn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
42377
42378 #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
42379 unsafefn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
42380 #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"]
42381 unsafefn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
42382 #[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"]
42383 unsafefn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
42384
42385 #[link_name = "llvm.x86.avx512.pternlog.d.512"]
42386 unsafefn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16;
42387 #[link_name = "llvm.x86.avx512.pternlog.d.256"]
42388 unsafefn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8;
42389 #[link_name = "llvm.x86.avx512.pternlog.d.128"]
42390 unsafefn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4;
42391
42392 #[link_name = "llvm.x86.avx512.pternlog.q.512"]
42393 unsafefn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8;
42394 #[link_name = "llvm.x86.avx512.pternlog.q.256"]
42395 unsafefn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4;
42396 #[link_name = "llvm.x86.avx512.pternlog.q.128"]
42397 unsafefn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2;
42398
42399 #[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
42400 unsafefn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
42401 #[link_name = "llvm.x86.avx512.mask.getmant.ps.256"]
42402 unsafefn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8;
42403 #[link_name = "llvm.x86.avx512.mask.getmant.ps.128"]
42404 unsafefn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4;
42405
42406 #[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
42407 unsafefn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
42408 #[link_name = "llvm.x86.avx512.mask.getmant.pd.256"]
42409 unsafefn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4;
42410 #[link_name = "llvm.x86.avx512.mask.getmant.pd.128"]
42411 unsafefn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2;
42412
42413 #[link_name = "llvm.x86.avx512.rcp14.ps.512"]
42414 unsafefn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42415 #[link_name = "llvm.x86.avx512.rcp14.ps.256"]
42416 unsafefn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42417 #[link_name = "llvm.x86.avx512.rcp14.ps.128"]
42418 unsafefn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42419
42420 #[link_name = "llvm.x86.avx512.rcp14.pd.512"]
42421 unsafefn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42422 #[link_name = "llvm.x86.avx512.rcp14.pd.256"]
42423 unsafefn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42424 #[link_name = "llvm.x86.avx512.rcp14.pd.128"]
42425 unsafefn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42426
42427 #[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
42428 unsafefn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
42429 #[link_name = "llvm.x86.avx512.rsqrt14.ps.256"]
42430 unsafefn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
42431 #[link_name = "llvm.x86.avx512.rsqrt14.ps.128"]
42432 unsafefn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
42433
42434 #[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
42435 unsafefn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
42436 #[link_name = "llvm.x86.avx512.rsqrt14.pd.256"]
42437 unsafefn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
42438 #[link_name = "llvm.x86.avx512.rsqrt14.pd.128"]
42439 unsafefn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
42440
42441 #[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
42442 unsafefn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42443
42444 #[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
42445 unsafefn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42446 #[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
42447 unsafefn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42448 #[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
42449 unsafefn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42450
42451 #[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
42452 unsafefn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
42453 #[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
42454 unsafefn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
42455
42456 #[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
42457 unsafefn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42458
42459 #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
42460 unsafefn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
42461 #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
42462 unsafefn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
42463 #[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
42464 unsafefn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
42465
42466 #[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
42467 unsafefn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
42468 #[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
42469 unsafefn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
42470
42471 #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
42472 unsafefn vcvtps2ph(a: f32x16, sae: i32, src: i16x16, mask: u16) -> i16x16;
42473 #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
42474 unsafefn vcvtps2ph256(a: f32x8, sae: i32, src: i16x8, mask: u8) -> i16x8;
42475 #[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
42476 unsafefn vcvtps2ph128(a: f32x4, sae: i32, src: i16x8, mask: u8) -> i16x8;
42477
42478 #[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
42479 unsafefn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
42480
42481 #[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
42482 unsafefn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
42483 #[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
42484 unsafefn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
42485 #[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
42486 unsafefn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
42487
42488 #[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
42489 unsafefn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
42490 #[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
42491 unsafefn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
42492 #[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
42493 unsafefn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
42494
42495 #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
42496 unsafefn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
42497 #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
42498 unsafefn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
42499 #[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
42500 unsafefn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
42501
42502 #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
42503 unsafefn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
42504 #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
42505 unsafefn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
42506 #[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
42507 unsafefn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
42508
42509 #[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
42510 unsafefn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42511 #[link_name = "llvm.x86.avx512.mask.pmov.db.256"]
42512 unsafefn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42513 #[link_name = "llvm.x86.avx512.mask.pmov.db.128"]
42514 unsafefn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42515
42516 #[link_name = "llvm.x86.avx512.mask.pmov.qw.256"]
42517 unsafefn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42518 #[link_name = "llvm.x86.avx512.mask.pmov.qw.128"]
42519 unsafefn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42520 #[link_name = "llvm.x86.avx512.mask.pmov.qb.256"]
42521 unsafefn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42522 #[link_name = "llvm.x86.avx512.mask.pmov.qb.128"]
42523 unsafefn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42524 #[link_name = "llvm.x86.avx512.mask.pmov.qd.128"]
42525 unsafefn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42526
42527 #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"]
42528 unsafefn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42529 #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"]
42530 unsafefn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42531 #[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
42532 unsafefn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42533
42534 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
42535 unsafefn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42536 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
42537 unsafefn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42538 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
42539 unsafefn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42540
42541 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
42542 unsafefn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42543 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
42544 unsafefn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42545 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
42546 unsafefn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42547
42548 #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
42549 unsafefn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42550 #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
42551 unsafefn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42552 #[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
42553 unsafefn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42554
42555 #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
42556 unsafefn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42557 #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
42558 unsafefn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42559 #[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
42560 unsafefn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42561
42562 #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
42563 unsafefn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
42564 #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
42565 unsafefn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
42566 #[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
42567 unsafefn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
42568
42569 #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
42570 unsafefn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42571 #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
42572 unsafefn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42573 #[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
42574 unsafefn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42575
42576 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
42577 unsafefn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42578 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
42579 unsafefn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42580 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
42581 unsafefn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42582
42583 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
42584 unsafefn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42585 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
42586 unsafefn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42587 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
42588 unsafefn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42589
42590 #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
42591 unsafefn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42592 #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
42593 unsafefn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42594 #[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
42595 unsafefn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42596
42597 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
42598 unsafefn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42599 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
42600 unsafefn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42601 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
42602 unsafefn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42603
42604 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
42605 unsafefn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42606 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
42607 unsafefn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42608 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
42609 unsafefn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42610
42611 #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
42612 unsafefn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42613 #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
42614 unsafefn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42615 #[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
42616 unsafefn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42617
42618 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
42619 unsafefn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42620 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
42621 unsafefn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42622 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
42623 unsafefn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42624
42625 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
42626 unsafefn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
42627 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
42628 unsafefn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
42629 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
42630 unsafefn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
42631
42632 #[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
42633 unsafefn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42634
42635 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
42636 unsafefn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
42637 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
42638 unsafefn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
42639 #[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
42640 unsafefn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
42641
42642 #[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
42643 unsafefn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
42644 #[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
42645 unsafefn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
42646 #[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
42647 unsafefn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
42648
42649 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
42650 unsafefn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
42651 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
42652 unsafefn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
42653 #[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
42654 unsafefn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
42655
42656 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
42657 unsafefn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
42658 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
42659 unsafefn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
42660 #[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
42661 unsafefn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
42662
42663 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
42664 unsafefn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
42665 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
42666 unsafefn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
42667 #[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
42668 unsafefn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
42669
42670 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
42671 unsafefn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
42672 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
42673 unsafefn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
42674 #[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
42675 unsafefn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
42676
42677 #[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
42678 unsafefn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
42679 #[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
42680 unsafefn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
42681 #[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
42682 unsafefn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
42683
42684 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
42685 unsafefn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
42686 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
42687 unsafefn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
42688 #[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
42689 unsafefn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
42690
42691 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
42692 unsafefn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
42693 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
42694 unsafefn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
42695 #[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
42696 unsafefn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
42697
42698 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
42699 unsafefn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
42700 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
42701 unsafefn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
42702 #[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
42703 unsafefn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
42704
42705 #[link_name = "llvm.x86.avx512.gather.dpd.512"]
42706 unsafefn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
42707 #[link_name = "llvm.x86.avx512.gather.dps.512"]
42708 unsafefn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
42709 #[link_name = "llvm.x86.avx512.gather.qpd.512"]
42710 unsafefn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
42711 #[link_name = "llvm.x86.avx512.gather.qps.512"]
42712 unsafefn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
42713 #[link_name = "llvm.x86.avx512.gather.dpq.512"]
42714 unsafefn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
42715 #[link_name = "llvm.x86.avx512.gather.dpi.512"]
42716 unsafefn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
42717 #[link_name = "llvm.x86.avx512.gather.qpq.512"]
42718 unsafefn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
42719 #[link_name = "llvm.x86.avx512.gather.qpi.512"]
42720 unsafefn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
42721
42722 #[link_name = "llvm.x86.avx512.scatter.dpd.512"]
42723 unsafefn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
42724 #[link_name = "llvm.x86.avx512.scatter.dps.512"]
42725 unsafefn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
42726 #[link_name = "llvm.x86.avx512.scatter.qpd.512"]
42727 unsafefn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
42728 #[link_name = "llvm.x86.avx512.scatter.qps.512"]
42729 unsafefn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
42730 #[link_name = "llvm.x86.avx512.scatter.dpq.512"]
42731 unsafefn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
42732
42733 #[link_name = "llvm.x86.avx512.scatter.dpi.512"]
42734 unsafefn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
42735 #[link_name = "llvm.x86.avx512.scatter.qpq.512"]
42736 unsafefn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
42737 #[link_name = "llvm.x86.avx512.scatter.qpi.512"]
42738 unsafefn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
42739
42740 #[link_name = "llvm.x86.avx512.scattersiv4.si"]
42741 unsafefn vpscatterdd_128(slice: *mut i8, k: u8, offsets: i32x4, src: i32x4, scale: i32);
42742 #[link_name = "llvm.x86.avx512.scattersiv2.di"]
42743 unsafefn vpscatterdq_128(slice: *mut i8, k: u8, offsets: i32x4, src: i64x2, scale: i32);
42744 #[link_name = "llvm.x86.avx512.scattersiv2.df"]
42745 unsafefn vscatterdpd_128(slice: *mut i8, k: u8, offsets: i32x4, src: f64x2, scale: i32);
42746 #[link_name = "llvm.x86.avx512.scattersiv4.sf"]
42747 unsafefn vscatterdps_128(slice: *mut i8, k: u8, offsets: i32x4, src: f32x4, scale: i32);
42748 #[link_name = "llvm.x86.avx512.scatterdiv4.si"]
42749 unsafefn vpscatterqd_128(slice: *mut i8, k: u8, offsets: i64x2, src: i32x4, scale: i32);
42750 #[link_name = "llvm.x86.avx512.scatterdiv2.di"]
42751 unsafefn vpscatterqq_128(slice: *mut i8, k: u8, offsets: i64x2, src: i64x2, scale: i32);
42752 #[link_name = "llvm.x86.avx512.scatterdiv2.df"]
42753 unsafefn vscatterqpd_128(slice: *mut i8, k: u8, offsets: i64x2, src: f64x2, scale: i32);
42754 #[link_name = "llvm.x86.avx512.scatterdiv4.sf"]
42755 unsafefn vscatterqps_128(slice: *mut i8, k: u8, offsets: i64x2, src: f32x4, scale: i32);
42756
42757 #[link_name = "llvm.x86.avx512.scattersiv8.si"]
42758 unsafefn vpscatterdd_256(slice: *mut i8, k: u8, offsets: i32x8, src: i32x8, scale: i32);
42759 #[link_name = "llvm.x86.avx512.scattersiv4.di"]
42760 unsafefn vpscatterdq_256(slice: *mut i8, k: u8, offsets: i32x4, src: i64x4, scale: i32);
42761 #[link_name = "llvm.x86.avx512.scattersiv4.df"]
42762 unsafefn vscatterdpd_256(slice: *mut i8, k: u8, offsets: i32x4, src: f64x4, scale: i32);
42763 #[link_name = "llvm.x86.avx512.scattersiv8.sf"]
42764 unsafefn vscatterdps_256(slice: *mut i8, k: u8, offsets: i32x8, src: f32x8, scale: i32);
42765 #[link_name = "llvm.x86.avx512.scatterdiv8.si"]
42766 unsafefn vpscatterqd_256(slice: *mut i8, k: u8, offsets: i64x4, src: i32x4, scale: i32);
42767 #[link_name = "llvm.x86.avx512.scatterdiv4.di"]
42768 unsafefn vpscatterqq_256(slice: *mut i8, k: u8, offsets: i64x4, src: i64x4, scale: i32);
42769 #[link_name = "llvm.x86.avx512.scatterdiv4.df"]
42770 unsafefn vscatterqpd_256(slice: *mut i8, k: u8, offsets: i64x4, src: f64x4, scale: i32);
42771 #[link_name = "llvm.x86.avx512.scatterdiv8.sf"]
42772 unsafefn vscatterqps_256(slice: *mut i8, k: u8, offsets: i64x4, src: f32x4, scale: i32);
42773
42774 #[link_name = "llvm.x86.avx512.gather3siv4.si"]
42775 unsafefn vpgatherdd_128(src: i32x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i32x4;
42776 #[link_name = "llvm.x86.avx512.gather3siv2.di"]
42777 unsafefn vpgatherdq_128(src: i64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x2;
42778 #[link_name = "llvm.x86.avx512.gather3siv2.df"]
42779 unsafefn vgatherdpd_128(src: f64x2, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x2;
42780 #[link_name = "llvm.x86.avx512.gather3siv4.sf"]
42781 unsafefn vgatherdps_128(src: f32x4, slice: *const u8, offsets: i32x4, k: u8, scale: i32) -> f32x4;
42782 #[link_name = "llvm.x86.avx512.gather3div4.si"]
42783 unsafefn vpgatherqd_128(src: i32x4, slice: *const u8, offsets: i64x2, k: u8, scale: i32) -> i32x4;
42784 #[link_name = "llvm.x86.avx512.gather3div2.di"]
42785 unsafefn vpgatherqq_128(src: i64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> i64x2;
42786 #[link_name = "llvm.x86.avx512.gather3div2.df"]
42787 unsafefn vgatherqpd_128(src: f64x2, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f64x2;
42788 #[link_name = "llvm.x86.avx512.gather3div4.sf"]
42789 unsafefn vgatherqps_128(src: f32x4, slice: *const i8, offsets: i64x2, k: u8, scale: i32) -> f32x4;
42790
42791 #[link_name = "llvm.x86.avx512.gather3siv8.si"]
42792 unsafefn vpgatherdd_256(src: i32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> i32x8;
42793 #[link_name = "llvm.x86.avx512.gather3siv4.di"]
42794 unsafefn vpgatherdq_256(src: i64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> i64x4;
42795 #[link_name = "llvm.x86.avx512.gather3siv4.df"]
42796 unsafefn vgatherdpd_256(src: f64x4, slice: *const i8, offsets: i32x4, k: u8, scale: i32) -> f64x4;
42797 #[link_name = "llvm.x86.avx512.gather3siv8.sf"]
42798 unsafefn vgatherdps_256(src: f32x8, slice: *const i8, offsets: i32x8, k: u8, scale: i32) -> f32x8;
42799 #[link_name = "llvm.x86.avx512.gather3div8.si"]
42800 unsafefn vpgatherqd_256(src: i32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i32x4;
42801 #[link_name = "llvm.x86.avx512.gather3div4.di"]
42802 unsafefn vpgatherqq_256(src: i64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> i64x4;
42803 #[link_name = "llvm.x86.avx512.gather3div4.df"]
42804 unsafefn vgatherqpd_256(src: f64x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f64x4;
42805 #[link_name = "llvm.x86.avx512.gather3div8.sf"]
42806 unsafefn vgatherqps_256(src: f32x4, slice: *const i8, offsets: i64x4, k: u8, scale: i32) -> f32x4;
42807
42808 #[link_name = "llvm.x86.avx512.mask.cmp.ss"]
42809 unsafefn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
42810 #[link_name = "llvm.x86.avx512.mask.cmp.sd"]
42811 unsafefn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
42812
42813 #[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
42814 unsafefn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
42815 #[link_name = "llvm.x86.avx512.mask.cmp.ps.256"]
42816 unsafefn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8;
42817 #[link_name = "llvm.x86.avx512.mask.cmp.ps.128"]
42818 unsafefn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8;
42819
42820 #[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
42821 unsafefn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
42822 #[link_name = "llvm.x86.avx512.mask.cmp.pd.256"]
42823 unsafefn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8;
42824 #[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
42825 unsafefn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
42826
42827 #[link_name = "llvm.x86.avx512.mask.prol.d.512"]
42828 unsafefn vprold(a: i32x16, i8: i32) -> i32x16;
42829 #[link_name = "llvm.x86.avx512.mask.prol.d.256"]
42830 unsafefn vprold256(a: i32x8, i8: i32) -> i32x8;
42831 #[link_name = "llvm.x86.avx512.mask.prol.d.128"]
42832 unsafefn vprold128(a: i32x4, i8: i32) -> i32x4;
42833
42834 #[link_name = "llvm.x86.avx512.mask.pror.d.512"]
42835 unsafefn vprord(a: i32x16, i8: i32) -> i32x16;
42836 #[link_name = "llvm.x86.avx512.mask.pror.d.256"]
42837 unsafefn vprord256(a: i32x8, i8: i32) -> i32x8;
42838 #[link_name = "llvm.x86.avx512.mask.pror.d.128"]
42839 unsafefn vprord128(a: i32x4, i8: i32) -> i32x4;
42840
42841 #[link_name = "llvm.x86.avx512.mask.prol.q.512"]
42842 unsafefn vprolq(a: i64x8, i8: i32) -> i64x8;
42843 #[link_name = "llvm.x86.avx512.mask.prol.q.256"]
42844 unsafefn vprolq256(a: i64x4, i8: i32) -> i64x4;
42845 #[link_name = "llvm.x86.avx512.mask.prol.q.128"]
42846 unsafefn vprolq128(a: i64x2, i8: i32) -> i64x2;
42847
42848 #[link_name = "llvm.x86.avx512.mask.pror.q.512"]
42849 unsafefn vprorq(a: i64x8, i8: i32) -> i64x8;
42850 #[link_name = "llvm.x86.avx512.mask.pror.q.256"]
42851 unsafefn vprorq256(a: i64x4, i8: i32) -> i64x4;
42852 #[link_name = "llvm.x86.avx512.mask.pror.q.128"]
42853 unsafefn vprorq128(a: i64x2, i8: i32) -> i64x2;
42854
42855 #[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
42856 unsafefn vprolvd(a: i32x16, b: i32x16) -> i32x16;
42857 #[link_name = "llvm.x86.avx512.mask.prolv.d.256"]
42858 unsafefn vprolvd256(a: i32x8, b: i32x8) -> i32x8;
42859 #[link_name = "llvm.x86.avx512.mask.prolv.d.128"]
42860 unsafefn vprolvd128(a: i32x4, b: i32x4) -> i32x4;
42861
42862 #[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
42863 unsafefn vprorvd(a: i32x16, b: i32x16) -> i32x16;
42864 #[link_name = "llvm.x86.avx512.mask.prorv.d.256"]
42865 unsafefn vprorvd256(a: i32x8, b: i32x8) -> i32x8;
42866 #[link_name = "llvm.x86.avx512.mask.prorv.d.128"]
42867 unsafefn vprorvd128(a: i32x4, b: i32x4) -> i32x4;
42868
42869 #[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
42870 unsafefn vprolvq(a: i64x8, b: i64x8) -> i64x8;
42871 #[link_name = "llvm.x86.avx512.mask.prolv.q.256"]
42872 unsafefn vprolvq256(a: i64x4, b: i64x4) -> i64x4;
42873 #[link_name = "llvm.x86.avx512.mask.prolv.q.128"]
42874 unsafefn vprolvq128(a: i64x2, b: i64x2) -> i64x2;
42875
42876 #[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
42877 unsafefn vprorvq(a: i64x8, b: i64x8) -> i64x8;
42878 #[link_name = "llvm.x86.avx512.mask.prorv.q.256"]
42879 unsafefn vprorvq256(a: i64x4, b: i64x4) -> i64x4;
42880 #[link_name = "llvm.x86.avx512.mask.prorv.q.128"]
42881 unsafefn vprorvq128(a: i64x2, b: i64x2) -> i64x2;
42882
42883 #[link_name = "llvm.x86.avx512.psllv.d.512"]
42884 unsafefn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
42885 #[link_name = "llvm.x86.avx512.psrlv.d.512"]
42886 unsafefn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
42887 #[link_name = "llvm.x86.avx512.psllv.q.512"]
42888 unsafefn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
42889 #[link_name = "llvm.x86.avx512.psrlv.q.512"]
42890 unsafefn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
42891
42892 #[link_name = "llvm.x86.avx512.psll.d.512"]
42893 unsafefn vpslld(a: i32x16, count: i32x4) -> i32x16;
42894 #[link_name = "llvm.x86.avx512.psrl.d.512"]
42895 unsafefn vpsrld(a: i32x16, count: i32x4) -> i32x16;
42896 #[link_name = "llvm.x86.avx512.psll.q.512"]
42897 unsafefn vpsllq(a: i64x8, count: i64x2) -> i64x8;
42898 #[link_name = "llvm.x86.avx512.psrl.q.512"]
42899 unsafefn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
42900
42901 #[link_name = "llvm.x86.avx512.psra.d.512"]
42902 unsafefn vpsrad(a: i32x16, count: i32x4) -> i32x16;
42903
42904 #[link_name = "llvm.x86.avx512.psra.q.512"]
42905 unsafefn vpsraq(a: i64x8, count: i64x2) -> i64x8;
42906 #[link_name = "llvm.x86.avx512.psra.q.256"]
42907 unsafefn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
42908 #[link_name = "llvm.x86.avx512.psra.q.128"]
42909 unsafefn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
42910
42911 #[link_name = "llvm.x86.avx512.psrav.d.512"]
42912 unsafefn vpsravd(a: i32x16, count: i32x16) -> i32x16;
42913
42914 #[link_name = "llvm.x86.avx512.psrav.q.512"]
42915 unsafefn vpsravq(a: i64x8, count: i64x8) -> i64x8;
42916 #[link_name = "llvm.x86.avx512.psrav.q.256"]
42917 unsafefn vpsravq256(a: i64x4, count: i64x4) -> i64x4;
42918 #[link_name = "llvm.x86.avx512.psrav.q.128"]
42919 unsafefn vpsravq128(a: i64x2, count: i64x2) -> i64x2;
42920
42921 #[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
42922 unsafefn vpermilps(a: f32x16, b: i32x16) -> f32x16;
42923 #[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
42924 unsafefn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
42925
42926 #[link_name = "llvm.x86.avx512.permvar.si.512"]
42927 unsafefn vpermd(a: i32x16, idx: i32x16) -> i32x16;
42928
42929 #[link_name = "llvm.x86.avx512.permvar.di.512"]
42930 unsafefn vpermq(a: i64x8, idx: i64x8) -> i64x8;
42931 #[link_name = "llvm.x86.avx512.permvar.di.256"]
42932 unsafefn vpermq256(a: i64x4, idx: i64x4) -> i64x4;
42933
42934 #[link_name = "llvm.x86.avx512.permvar.sf.512"]
42935 unsafefn vpermps(a: f32x16, idx: i32x16) -> f32x16;
42936
42937 #[link_name = "llvm.x86.avx512.permvar.df.512"]
42938 unsafefn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
42939 #[link_name = "llvm.x86.avx512.permvar.df.256"]
42940 unsafefn vpermpd256(a: f64x4, idx: i64x4) -> f64x4;
42941
42942 #[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
42943 unsafefn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
42944 #[link_name = "llvm.x86.avx512.vpermi2var.d.256"]
42945 unsafefn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8;
42946 #[link_name = "llvm.x86.avx512.vpermi2var.d.128"]
42947 unsafefn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4;
42948
42949 #[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
42950 unsafefn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
42951 #[link_name = "llvm.x86.avx512.vpermi2var.q.256"]
42952 unsafefn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4;
42953 #[link_name = "llvm.x86.avx512.vpermi2var.q.128"]
42954 unsafefn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2;
42955
42956 #[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
42957 unsafefn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
42958 #[link_name = "llvm.x86.avx512.vpermi2var.ps.256"]
42959 unsafefn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8;
42960 #[link_name = "llvm.x86.avx512.vpermi2var.ps.128"]
42961 unsafefn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4;
42962
42963 #[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
42964 unsafefn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
42965 #[link_name = "llvm.x86.avx512.vpermi2var.pd.256"]
42966 unsafefn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4;
42967 #[link_name = "llvm.x86.avx512.vpermi2var.pd.128"]
42968 unsafefn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2;
42969
42970 #[link_name = "llvm.x86.avx512.mask.compress.d.512"]
42971 unsafefn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
42972 #[link_name = "llvm.x86.avx512.mask.compress.d.256"]
42973 unsafefn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
42974 #[link_name = "llvm.x86.avx512.mask.compress.d.128"]
42975 unsafefn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
42976
42977 #[link_name = "llvm.x86.avx512.mask.compress.q.512"]
42978 unsafefn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
42979 #[link_name = "llvm.x86.avx512.mask.compress.q.256"]
42980 unsafefn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
42981 #[link_name = "llvm.x86.avx512.mask.compress.q.128"]
42982 unsafefn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
42983
42984 #[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
42985 unsafefn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
42986 #[link_name = "llvm.x86.avx512.mask.compress.ps.256"]
42987 unsafefn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
42988 #[link_name = "llvm.x86.avx512.mask.compress.ps.128"]
42989 unsafefn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
42990
42991 #[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
42992 unsafefn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
42993 #[link_name = "llvm.x86.avx512.mask.compress.pd.256"]
42994 unsafefn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
42995 #[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
42996 unsafefn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
42997
42998 #[link_name = "llvm.x86.avx512.mask.compress.store.d.512"]
42999 unsafefn vcompressstored(mem: *mut i8, data: i32x16, mask: u16);
43000 #[link_name = "llvm.x86.avx512.mask.compress.store.d.256"]
43001 unsafefn vcompressstored256(mem: *mut i8, data: i32x8, mask: u8);
43002 #[link_name = "llvm.x86.avx512.mask.compress.store.d.128"]
43003 unsafefn vcompressstored128(mem: *mut i8, data: i32x4, mask: u8);
43004
43005 #[link_name = "llvm.x86.avx512.mask.compress.store.q.512"]
43006 unsafefn vcompressstoreq(mem: *mut i8, data: i64x8, mask: u8);
43007 #[link_name = "llvm.x86.avx512.mask.compress.store.q.256"]
43008 unsafefn vcompressstoreq256(mem: *mut i8, data: i64x4, mask: u8);
43009 #[link_name = "llvm.x86.avx512.mask.compress.store.q.128"]
43010 unsafefn vcompressstoreq128(mem: *mut i8, data: i64x2, mask: u8);
43011
43012 #[link_name = "llvm.x86.avx512.mask.compress.store.ps.512"]
43013 unsafefn vcompressstoreps(mem: *mut i8, data: f32x16, mask: u16);
43014 #[link_name = "llvm.x86.avx512.mask.compress.store.ps.256"]
43015 unsafefn vcompressstoreps256(mem: *mut i8, data: f32x8, mask: u8);
43016 #[link_name = "llvm.x86.avx512.mask.compress.store.ps.128"]
43017 unsafefn vcompressstoreps128(mem: *mut i8, data: f32x4, mask: u8);
43018
43019 #[link_name = "llvm.x86.avx512.mask.compress.store.pd.512"]
43020 unsafefn vcompressstorepd(mem: *mut i8, data: f64x8, mask: u8);
43021 #[link_name = "llvm.x86.avx512.mask.compress.store.pd.256"]
43022 unsafefn vcompressstorepd256(mem: *mut i8, data: f64x4, mask: u8);
43023 #[link_name = "llvm.x86.avx512.mask.compress.store.pd.128"]
43024 unsafefn vcompressstorepd128(mem: *mut i8, data: f64x2, mask: u8);
43025
43026 #[link_name = "llvm.x86.avx512.mask.expand.d.512"]
43027 unsafefn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
43028 #[link_name = "llvm.x86.avx512.mask.expand.d.256"]
43029 unsafefn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
43030 #[link_name = "llvm.x86.avx512.mask.expand.d.128"]
43031 unsafefn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
43032
43033 #[link_name = "llvm.x86.avx512.mask.expand.q.512"]
43034 unsafefn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
43035 #[link_name = "llvm.x86.avx512.mask.expand.q.256"]
43036 unsafefn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
43037 #[link_name = "llvm.x86.avx512.mask.expand.q.128"]
43038 unsafefn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
43039
43040 #[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
43041 unsafefn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
43042 #[link_name = "llvm.x86.avx512.mask.expand.ps.256"]
43043 unsafefn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
43044 #[link_name = "llvm.x86.avx512.mask.expand.ps.128"]
43045 unsafefn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
43046
43047 #[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
43048 unsafefn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
43049 #[link_name = "llvm.x86.avx512.mask.expand.pd.256"]
43050 unsafefn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
43051 #[link_name = "llvm.x86.avx512.mask.expand.pd.128"]
43052 unsafefn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
43053
43054 #[link_name = "llvm.x86.avx512.mask.add.ss.round"]
43055 unsafefn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43056 #[link_name = "llvm.x86.avx512.mask.add.sd.round"]
43057 unsafefn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43058 #[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
43059 unsafefn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43060 #[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
43061 unsafefn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43062 #[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
43063 unsafefn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43064 #[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
43065 unsafefn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43066 #[link_name = "llvm.x86.avx512.mask.div.ss.round"]
43067 unsafefn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43068 #[link_name = "llvm.x86.avx512.mask.div.sd.round"]
43069 unsafefn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43070 #[link_name = "llvm.x86.avx512.mask.max.ss.round"]
43071 unsafefn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43072 #[link_name = "llvm.x86.avx512.mask.max.sd.round"]
43073 unsafefn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43074 #[link_name = "llvm.x86.avx512.mask.min.ss.round"]
43075 unsafefn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43076 #[link_name = "llvm.x86.avx512.mask.min.sd.round"]
43077 unsafefn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43078 #[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
43079 unsafefn vsqrtss(a: __m128, b: __m128, src: __m128, mask: u8, rounding: i32) -> __m128;
43080 #[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
43081 unsafefn vsqrtsd(a: __m128d, b: __m128d, src: __m128d, mask: u8, rounding: i32) -> __m128d;
43082 #[link_name = "llvm.x86.avx512.mask.getexp.ss"]
43083 unsafefn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
43084 #[link_name = "llvm.x86.avx512.mask.getexp.sd"]
43085 unsafefn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
43086 #[link_name = "llvm.x86.avx512.mask.getmant.ss"]
43087 unsafefn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
43088 #[link_name = "llvm.x86.avx512.mask.getmant.sd"]
43089 unsafefn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
43090
43091 #[link_name = "llvm.x86.avx512.rsqrt14.ss"]
43092 unsafefn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43093 #[link_name = "llvm.x86.avx512.rsqrt14.sd"]
43094 unsafefn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43095 #[link_name = "llvm.x86.avx512.rcp14.ss"]
43096 unsafefn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
43097 #[link_name = "llvm.x86.avx512.rcp14.sd"]
43098 unsafefn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
43099
43100 #[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
43101 unsafefn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
43102 #[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
43103 unsafefn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
43104 #[link_name = "llvm.x86.avx512.mask.scalef.ss"]
43105 unsafefn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43106 #[link_name = "llvm.x86.avx512.mask.scalef.sd"]
43107 unsafefn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
43108
43109 #[link_name = "llvm.x86.avx512.vfmadd.f32"]
43110 unsafefn vfmaddssround(a: f32, b: f32, c: f32, rounding: i32) -> f32;
43111 #[link_name = "llvm.x86.avx512.vfmadd.f64"]
43112 unsafefn vfmaddsdround(a: f64, b: f64, c: f64, rounding: i32) -> f64;
43113
43114 #[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
43115 unsafefn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43116 #[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
43117 unsafefn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43118 #[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
43119 unsafefn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
43120 #[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
43121 unsafefn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
43122
43123 #[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
43124 unsafefn vcvtss2sd(a: f64x2, b: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
43125 #[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
43126 unsafefn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
43127
43128 #[link_name = "llvm.x86.avx512.vcvtss2si32"]
43129 unsafefn vcvtss2si(a: f32x4, rounding: i32) -> i32;
43130 #[link_name = "llvm.x86.avx512.vcvtss2usi32"]
43131 unsafefn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
43132
43133 #[link_name = "llvm.x86.avx512.vcvtsd2si32"]
43134 unsafefn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
43135 #[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
43136 unsafefn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
43137
43138 #[link_name = "llvm.x86.avx512.cvtsi2ss32"]
43139 unsafefn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
43140
43141 #[link_name = "llvm.x86.avx512.cvtusi2ss"]
43142 unsafefn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
43143
43144 #[link_name = "llvm.x86.avx512.cvttss2si"]
43145 unsafefn vcvttss2si(a: f32x4, rounding: i32) -> i32;
43146 #[link_name = "llvm.x86.avx512.cvttss2usi"]
43147 unsafefn vcvttss2usi(a: f32x4, rounding: i32) -> u32;
43148
43149 #[link_name = "llvm.x86.avx512.cvttsd2si"]
43150 unsafefn vcvttsd2si(a: f64x2, rounding: i32) -> i32;
43151 #[link_name = "llvm.x86.avx512.cvttsd2usi"]
43152 unsafefn vcvttsd2usi(a: f64x2, rounding: i32) -> u32;
43153
43154 #[link_name = "llvm.x86.avx512.vcomi.ss"]
43155 unsafefn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
43156 #[link_name = "llvm.x86.avx512.vcomi.sd"]
43157 unsafefn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
43158
43159 #[link_name = "llvm.x86.avx512.mask.loadu.d.128"]
43160 unsafefn loaddqu32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43161 #[link_name = "llvm.x86.avx512.mask.loadu.q.128"]
43162 unsafefn loaddqu64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43163 #[link_name = "llvm.x86.avx512.mask.loadu.ps.128"]
43164 unsafefn loadups_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43165 #[link_name = "llvm.x86.avx512.mask.loadu.pd.128"]
43166 unsafefn loadupd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43167 #[link_name = "llvm.x86.avx512.mask.loadu.d.256"]
43168 unsafefn loaddqu32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43169 #[link_name = "llvm.x86.avx512.mask.loadu.q.256"]
43170 unsafefn loaddqu64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43171 #[link_name = "llvm.x86.avx512.mask.loadu.ps.256"]
43172 unsafefn loadups_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43173 #[link_name = "llvm.x86.avx512.mask.loadu.pd.256"]
43174 unsafefn loadupd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43175 #[link_name = "llvm.x86.avx512.mask.loadu.d.512"]
43176 unsafefn loaddqu32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43177 #[link_name = "llvm.x86.avx512.mask.loadu.q.512"]
43178 unsafefn loaddqu64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43179 #[link_name = "llvm.x86.avx512.mask.loadu.ps.512"]
43180 unsafefn loadups_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43181 #[link_name = "llvm.x86.avx512.mask.loadu.pd.512"]
43182 unsafefn loadupd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43183
43184 #[link_name = "llvm.x86.avx512.mask.load.d.128"]
43185 unsafefn loaddqa32_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43186 #[link_name = "llvm.x86.avx512.mask.load.q.128"]
43187 unsafefn loaddqa64_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43188 #[link_name = "llvm.x86.avx512.mask.load.ps.128"]
43189 unsafefn loadaps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43190 #[link_name = "llvm.x86.avx512.mask.load.pd.128"]
43191 unsafefn loadapd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43192 #[link_name = "llvm.x86.avx512.mask.load.d.256"]
43193 unsafefn loaddqa32_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43194 #[link_name = "llvm.x86.avx512.mask.load.q.256"]
43195 unsafefn loaddqa64_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43196 #[link_name = "llvm.x86.avx512.mask.load.ps.256"]
43197 unsafefn loadaps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43198 #[link_name = "llvm.x86.avx512.mask.load.pd.256"]
43199 unsafefn loadapd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43200 #[link_name = "llvm.x86.avx512.mask.load.d.512"]
43201 unsafefn loaddqa32_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43202 #[link_name = "llvm.x86.avx512.mask.load.q.512"]
43203 unsafefn loaddqa64_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43204 #[link_name = "llvm.x86.avx512.mask.load.ps.512"]
43205 unsafefn loadaps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43206 #[link_name = "llvm.x86.avx512.mask.load.pd.512"]
43207 unsafefn loadapd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43208
43209 #[link_name = "llvm.x86.avx512.mask.storeu.d.128"]
43210 unsafefn storedqu32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
43211 #[link_name = "llvm.x86.avx512.mask.storeu.q.128"]
43212 unsafefn storedqu64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
43213 #[link_name = "llvm.x86.avx512.mask.storeu.ps.128"]
43214 unsafefn storeups_128(mem_addr: *mut f32, a: f32x4, mask: u8);
43215 #[link_name = "llvm.x86.avx512.mask.storeu.pd.128"]
43216 unsafefn storeupd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
43217 #[link_name = "llvm.x86.avx512.mask.storeu.d.256"]
43218 unsafefn storedqu32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
43219 #[link_name = "llvm.x86.avx512.mask.storeu.q.256"]
43220 unsafefn storedqu64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
43221 #[link_name = "llvm.x86.avx512.mask.storeu.ps.256"]
43222 unsafefn storeups_256(mem_addr: *mut f32, a: f32x8, mask: u8);
43223 #[link_name = "llvm.x86.avx512.mask.storeu.pd.256"]
43224 unsafefn storeupd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
43225 #[link_name = "llvm.x86.avx512.mask.storeu.d.512"]
43226 unsafefn storedqu32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
43227 #[link_name = "llvm.x86.avx512.mask.storeu.q.512"]
43228 unsafefn storedqu64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
43229 #[link_name = "llvm.x86.avx512.mask.storeu.ps.512"]
43230 unsafefn storeups_512(mem_addr: *mut f32, a: f32x16, mask: u16);
43231 #[link_name = "llvm.x86.avx512.mask.storeu.pd.512"]
43232 unsafefn storeupd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
43233
43234 #[link_name = "llvm.x86.avx512.mask.store.d.128"]
43235 unsafefn storedqa32_128(mem_addr: *mut i32, a: i32x4, mask: u8);
43236 #[link_name = "llvm.x86.avx512.mask.store.q.128"]
43237 unsafefn storedqa64_128(mem_addr: *mut i64, a: i64x2, mask: u8);
43238 #[link_name = "llvm.x86.avx512.mask.store.ps.128"]
43239 unsafefn storeaps_128(mem_addr: *mut f32, a: f32x4, mask: u8);
43240 #[link_name = "llvm.x86.avx512.mask.store.pd.128"]
43241 unsafefn storeapd_128(mem_addr: *mut f64, a: f64x2, mask: u8);
43242 #[link_name = "llvm.x86.avx512.mask.store.d.256"]
43243 unsafefn storedqa32_256(mem_addr: *mut i32, a: i32x8, mask: u8);
43244 #[link_name = "llvm.x86.avx512.mask.store.q.256"]
43245 unsafefn storedqa64_256(mem_addr: *mut i64, a: i64x4, mask: u8);
43246 #[link_name = "llvm.x86.avx512.mask.store.ps.256"]
43247 unsafefn storeaps_256(mem_addr: *mut f32, a: f32x8, mask: u8);
43248 #[link_name = "llvm.x86.avx512.mask.store.pd.256"]
43249 unsafefn storeapd_256(mem_addr: *mut f64, a: f64x4, mask: u8);
43250 #[link_name = "llvm.x86.avx512.mask.store.d.512"]
43251 unsafefn storedqa32_512(mem_addr: *mut i32, a: i32x16, mask: u16);
43252 #[link_name = "llvm.x86.avx512.mask.store.q.512"]
43253 unsafefn storedqa64_512(mem_addr: *mut i64, a: i64x8, mask: u8);
43254 #[link_name = "llvm.x86.avx512.mask.store.ps.512"]
43255 unsafefn storeaps_512(mem_addr: *mut f32, a: f32x16, mask: u16);
43256 #[link_name = "llvm.x86.avx512.mask.store.pd.512"]
43257 unsafefn storeapd_512(mem_addr: *mut f64, a: f64x8, mask: u8);
43258
43259 #[link_name = "llvm.x86.avx512.mask.expand.load.d.128"]
43260 unsafefn expandloadd_128(mem_addr: *const i32, a: i32x4, mask: u8) -> i32x4;
43261 #[link_name = "llvm.x86.avx512.mask.expand.load.q.128"]
43262 unsafefn expandloadq_128(mem_addr: *const i64, a: i64x2, mask: u8) -> i64x2;
43263 #[link_name = "llvm.x86.avx512.mask.expand.load.ps.128"]
43264 unsafefn expandloadps_128(mem_addr: *const f32, a: f32x4, mask: u8) -> f32x4;
43265 #[link_name = "llvm.x86.avx512.mask.expand.load.pd.128"]
43266 unsafefn expandloadpd_128(mem_addr: *const f64, a: f64x2, mask: u8) -> f64x2;
43267 #[link_name = "llvm.x86.avx512.mask.expand.load.d.256"]
43268 unsafefn expandloadd_256(mem_addr: *const i32, a: i32x8, mask: u8) -> i32x8;
43269 #[link_name = "llvm.x86.avx512.mask.expand.load.q.256"]
43270 unsafefn expandloadq_256(mem_addr: *const i64, a: i64x4, mask: u8) -> i64x4;
43271 #[link_name = "llvm.x86.avx512.mask.expand.load.ps.256"]
43272 unsafefn expandloadps_256(mem_addr: *const f32, a: f32x8, mask: u8) -> f32x8;
43273 #[link_name = "llvm.x86.avx512.mask.expand.load.pd.256"]
43274 unsafefn expandloadpd_256(mem_addr: *const f64, a: f64x4, mask: u8) -> f64x4;
43275 #[link_name = "llvm.x86.avx512.mask.expand.load.d.512"]
43276 unsafefn expandloadd_512(mem_addr: *const i32, a: i32x16, mask: u16) -> i32x16;
43277 #[link_name = "llvm.x86.avx512.mask.expand.load.q.512"]
43278 unsafefn expandloadq_512(mem_addr: *const i64, a: i64x8, mask: u8) -> i64x8;
43279 #[link_name = "llvm.x86.avx512.mask.expand.load.ps.512"]
43280 unsafefn expandloadps_512(mem_addr: *const f32, a: f32x16, mask: u16) -> f32x16;
43281 #[link_name = "llvm.x86.avx512.mask.expand.load.pd.512"]
43282 unsafefn expandloadpd_512(mem_addr: *const f64, a: f64x8, mask: u8) -> f64x8;
43283
43284}
43285
43286#[cfg(test)]
43287mod tests {
43288
43289 use stdarch_test::simd_test;
43290
43291 use crate::core_arch::x86::*;
43292 use crate::hint::black_box;
43293 use crate::mem::{self};
43294
43295 #[simd_test(enable = "avx512f")]
43296 unsafe fn test_mm512_abs_epi32() {
43297 #[rustfmt::skip]
43298 let a = _mm512_setr_epi32(
43299 0, 1, -1, i32::MAX,
43300 i32::MIN, 100, -100, -32,
43301 0, 1, -1, i32::MAX,
43302 i32::MIN, 100, -100, -32,
43303 );
43304 let r = _mm512_abs_epi32(a);
43305 #[rustfmt::skip]
43306 let e = _mm512_setr_epi32(
43307 0, 1, 1, i32::MAX,
43308 i32::MAX.wrapping_add(1), 100, 100, 32,
43309 0, 1, 1, i32::MAX,
43310 i32::MAX.wrapping_add(1), 100, 100, 32,
43311 );
43312 assert_eq_m512i(r, e);
43313 }
43314
43315 #[simd_test(enable = "avx512f")]
43316 unsafe fn test_mm512_mask_abs_epi32() {
43317 #[rustfmt::skip]
43318 let a = _mm512_setr_epi32(
43319 0, 1, -1, i32::MAX,
43320 i32::MIN, 100, -100, -32,
43321 0, 1, -1, i32::MAX,
43322 i32::MIN, 100, -100, -32,
43323 );
43324 let r = _mm512_mask_abs_epi32(a, 0, a);
43325 assert_eq_m512i(r, a);
43326 let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
43327 #[rustfmt::skip]
43328 let e = _mm512_setr_epi32(
43329 0, 1, 1, i32::MAX,
43330 i32::MAX.wrapping_add(1), 100, 100, 32,
43331 0, 1, -1, i32::MAX,
43332 i32::MIN, 100, -100, -32,
43333 );
43334 assert_eq_m512i(r, e);
43335 }
43336
43337 #[simd_test(enable = "avx512f")]
43338 unsafe fn test_mm512_maskz_abs_epi32() {
43339 #[rustfmt::skip]
43340 let a = _mm512_setr_epi32(
43341 0, 1, -1, i32::MAX,
43342 i32::MIN, 100, -100, -32,
43343 0, 1, -1, i32::MAX,
43344 i32::MIN, 100, -100, -32,
43345 );
43346 let r = _mm512_maskz_abs_epi32(0, a);
43347 assert_eq_m512i(r, _mm512_setzero_si512());
43348 let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
43349 #[rustfmt::skip]
43350 let e = _mm512_setr_epi32(
43351 0, 1, 1, i32::MAX,
43352 i32::MAX.wrapping_add(1), 100, 100, 32,
43353 0, 0, 0, 0,
43354 0, 0, 0, 0,
43355 );
43356 assert_eq_m512i(r, e);
43357 }
43358
43359 #[simd_test(enable = "avx512f,avx512vl")]
43360 unsafe fn test_mm256_mask_abs_epi32() {
43361 #[rustfmt::skip]
43362 let a = _mm256_setr_epi32(
43363 0, 1, -1, i32::MAX,
43364 i32::MIN, 100, -100, -32,
43365 );
43366 let r = _mm256_mask_abs_epi32(a, 0, a);
43367 assert_eq_m256i(r, a);
43368 let r = _mm256_mask_abs_epi32(a, 0b00001111, a);
43369 #[rustfmt::skip]
43370 let e = _mm256_setr_epi32(
43371 0, 1, 1, i32::MAX,
43372 i32::MAX.wrapping_add(1), 100, -100, -32,
43373 );
43374 assert_eq_m256i(r, e);
43375 }
43376
43377 #[simd_test(enable = "avx512f,avx512vl")]
43378 unsafe fn test_mm256_maskz_abs_epi32() {
43379 #[rustfmt::skip]
43380 let a = _mm256_setr_epi32(
43381 0, 1, -1, i32::MAX,
43382 i32::MIN, 100, -100, -32,
43383 );
43384 let r = _mm256_maskz_abs_epi32(0, a);
43385 assert_eq_m256i(r, _mm256_setzero_si256());
43386 let r = _mm256_maskz_abs_epi32(0b00001111, a);
43387 #[rustfmt::skip]
43388 let e = _mm256_setr_epi32(
43389 0, 1, 1, i32::MAX,
43390 0, 0, 0, 0,
43391 );
43392 assert_eq_m256i(r, e);
43393 }
43394
43395 #[simd_test(enable = "avx512f,avx512vl")]
43396 unsafe fn test_mm_mask_abs_epi32() {
43397 let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
43398 let r = _mm_mask_abs_epi32(a, 0, a);
43399 assert_eq_m128i(r, a);
43400 let r = _mm_mask_abs_epi32(a, 0b00001111, a);
43401 let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
43402 assert_eq_m128i(r, e);
43403 }
43404
43405 #[simd_test(enable = "avx512f,avx512vl")]
43406 unsafe fn test_mm_maskz_abs_epi32() {
43407 let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
43408 let r = _mm_maskz_abs_epi32(0, a);
43409 assert_eq_m128i(r, _mm_setzero_si128());
43410 let r = _mm_maskz_abs_epi32(0b00001111, a);
43411 let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
43412 assert_eq_m128i(r, e);
43413 }
43414
43415 #[simd_test(enable = "avx512f")]
43416 unsafe fn test_mm512_abs_ps() {
43417 #[rustfmt::skip]
43418 let a = _mm512_setr_ps(
43419 0., 1., -1., f32::MAX,
43420 f32::MIN, 100., -100., -32.,
43421 0., 1., -1., f32::MAX,
43422 f32::MIN, 100., -100., -32.,
43423 );
43424 let r = _mm512_abs_ps(a);
43425 #[rustfmt::skip]
43426 let e = _mm512_setr_ps(
43427 0., 1., 1., f32::MAX,
43428 f32::MAX, 100., 100., 32.,
43429 0., 1., 1., f32::MAX,
43430 f32::MAX, 100., 100., 32.,
43431 );
43432 assert_eq_m512(r, e);
43433 }
43434
43435 #[simd_test(enable = "avx512f")]
43436 unsafe fn test_mm512_mask_abs_ps() {
43437 #[rustfmt::skip]
43438 let a = _mm512_setr_ps(
43439 0., 1., -1., f32::MAX,
43440 f32::MIN, 100., -100., -32.,
43441 0., 1., -1., f32::MAX,
43442 f32::MIN, 100., -100., -32.,
43443 );
43444 let r = _mm512_mask_abs_ps(a, 0, a);
43445 assert_eq_m512(r, a);
43446 let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a);
43447 #[rustfmt::skip]
43448 let e = _mm512_setr_ps(
43449 0., 1., 1., f32::MAX,
43450 f32::MAX, 100., 100., 32.,
43451 0., 1., -1., f32::MAX,
43452 f32::MIN, 100., -100., -32.,
43453 );
43454 assert_eq_m512(r, e);
43455 }
43456
43457 #[simd_test(enable = "avx512f")]
43458 unsafe fn test_mm512_mask_mov_epi32() {
43459 let src = _mm512_set1_epi32(1);
43460 let a = _mm512_set1_epi32(2);
43461 let r = _mm512_mask_mov_epi32(src, 0, a);
43462 assert_eq_m512i(r, src);
43463 let r = _mm512_mask_mov_epi32(src, 0b11111111_11111111, a);
43464 assert_eq_m512i(r, a);
43465 }
43466
43467 #[simd_test(enable = "avx512f")]
43468 unsafe fn test_mm512_maskz_mov_epi32() {
43469 let a = _mm512_set1_epi32(2);
43470 let r = _mm512_maskz_mov_epi32(0, a);
43471 assert_eq_m512i(r, _mm512_setzero_si512());
43472 let r = _mm512_maskz_mov_epi32(0b11111111_11111111, a);
43473 assert_eq_m512i(r, a);
43474 }
43475
43476 #[simd_test(enable = "avx512f,avx512vl")]
43477 unsafe fn test_mm256_mask_mov_epi32() {
43478 let src = _mm256_set1_epi32(1);
43479 let a = _mm256_set1_epi32(2);
43480 let r = _mm256_mask_mov_epi32(src, 0, a);
43481 assert_eq_m256i(r, src);
43482 let r = _mm256_mask_mov_epi32(src, 0b11111111, a);
43483 assert_eq_m256i(r, a);
43484 }
43485
43486 #[simd_test(enable = "avx512f,avx512vl")]
43487 unsafe fn test_mm256_maskz_mov_epi32() {
43488 let a = _mm256_set1_epi32(2);
43489 let r = _mm256_maskz_mov_epi32(0, a);
43490 assert_eq_m256i(r, _mm256_setzero_si256());
43491 let r = _mm256_maskz_mov_epi32(0b11111111, a);
43492 assert_eq_m256i(r, a);
43493 }
43494
43495 #[simd_test(enable = "avx512f,avx512vl")]
43496 unsafe fn test_mm_mask_mov_epi32() {
43497 let src = _mm_set1_epi32(1);
43498 let a = _mm_set1_epi32(2);
43499 let r = _mm_mask_mov_epi32(src, 0, a);
43500 assert_eq_m128i(r, src);
43501 let r = _mm_mask_mov_epi32(src, 0b00001111, a);
43502 assert_eq_m128i(r, a);
43503 }
43504
43505 #[simd_test(enable = "avx512f,avx512vl")]
43506 unsafe fn test_mm_maskz_mov_epi32() {
43507 let a = _mm_set1_epi32(2);
43508 let r = _mm_maskz_mov_epi32(0, a);
43509 assert_eq_m128i(r, _mm_setzero_si128());
43510 let r = _mm_maskz_mov_epi32(0b00001111, a);
43511 assert_eq_m128i(r, a);
43512 }
43513
43514 #[simd_test(enable = "avx512f")]
43515 unsafe fn test_mm512_mask_mov_ps() {
43516 let src = _mm512_set1_ps(1.);
43517 let a = _mm512_set1_ps(2.);
43518 let r = _mm512_mask_mov_ps(src, 0, a);
43519 assert_eq_m512(r, src);
43520 let r = _mm512_mask_mov_ps(src, 0b11111111_11111111, a);
43521 assert_eq_m512(r, a);
43522 }
43523
43524 #[simd_test(enable = "avx512f")]
43525 unsafe fn test_mm512_maskz_mov_ps() {
43526 let a = _mm512_set1_ps(2.);
43527 let r = _mm512_maskz_mov_ps(0, a);
43528 assert_eq_m512(r, _mm512_setzero_ps());
43529 let r = _mm512_maskz_mov_ps(0b11111111_11111111, a);
43530 assert_eq_m512(r, a);
43531 }
43532
43533 #[simd_test(enable = "avx512f,avx512vl")]
43534 unsafe fn test_mm256_mask_mov_ps() {
43535 let src = _mm256_set1_ps(1.);
43536 let a = _mm256_set1_ps(2.);
43537 let r = _mm256_mask_mov_ps(src, 0, a);
43538 assert_eq_m256(r, src);
43539 let r = _mm256_mask_mov_ps(src, 0b11111111, a);
43540 assert_eq_m256(r, a);
43541 }
43542
43543 #[simd_test(enable = "avx512f,avx512vl")]
43544 unsafe fn test_mm256_maskz_mov_ps() {
43545 let a = _mm256_set1_ps(2.);
43546 let r = _mm256_maskz_mov_ps(0, a);
43547 assert_eq_m256(r, _mm256_setzero_ps());
43548 let r = _mm256_maskz_mov_ps(0b11111111, a);
43549 assert_eq_m256(r, a);
43550 }
43551
43552 #[simd_test(enable = "avx512f,avx512vl")]
43553 unsafe fn test_mm_mask_mov_ps() {
43554 let src = _mm_set1_ps(1.);
43555 let a = _mm_set1_ps(2.);
43556 let r = _mm_mask_mov_ps(src, 0, a);
43557 assert_eq_m128(r, src);
43558 let r = _mm_mask_mov_ps(src, 0b00001111, a);
43559 assert_eq_m128(r, a);
43560 }
43561
43562 #[simd_test(enable = "avx512f,avx512vl")]
43563 unsafe fn test_mm_maskz_mov_ps() {
43564 let a = _mm_set1_ps(2.);
43565 let r = _mm_maskz_mov_ps(0, a);
43566 assert_eq_m128(r, _mm_setzero_ps());
43567 let r = _mm_maskz_mov_ps(0b00001111, a);
43568 assert_eq_m128(r, a);
43569 }
43570
43571 #[simd_test(enable = "avx512f")]
43572 unsafe fn test_mm512_add_epi32() {
43573 #[rustfmt::skip]
43574 let a = _mm512_setr_epi32(
43575 0, 1, -1, i32::MAX,
43576 i32::MIN, 100, -100, -32,
43577 0, 1, -1, i32::MAX,
43578 i32::MIN, 100, -100, -32,
43579 );
43580 let b = _mm512_set1_epi32(1);
43581 let r = _mm512_add_epi32(a, b);
43582 #[rustfmt::skip]
43583 let e = _mm512_setr_epi32(
43584 1, 2, 0, i32::MIN,
43585 i32::MIN + 1, 101, -99, -31,
43586 1, 2, 0, i32::MIN,
43587 i32::MIN + 1, 101, -99, -31,
43588 );
43589 assert_eq_m512i(r, e);
43590 }
43591
43592 #[simd_test(enable = "avx512f")]
43593 unsafe fn test_mm512_mask_add_epi32() {
43594 #[rustfmt::skip]
43595 let a = _mm512_setr_epi32(
43596 0, 1, -1, i32::MAX,
43597 i32::MIN, 100, -100, -32,
43598 0, 1, -1, i32::MAX,
43599 i32::MIN, 100, -100, -32,
43600 );
43601 let b = _mm512_set1_epi32(1);
43602 let r = _mm512_mask_add_epi32(a, 0, a, b);
43603 assert_eq_m512i(r, a);
43604 let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
43605 #[rustfmt::skip]
43606 let e = _mm512_setr_epi32(
43607 1, 2, 0, i32::MIN,
43608 i32::MIN + 1, 101, -99, -31,
43609 0, 1, -1, i32::MAX,
43610 i32::MIN, 100, -100, -32,
43611 );
43612 assert_eq_m512i(r, e);
43613 }
43614
43615 #[simd_test(enable = "avx512f")]
43616 unsafe fn test_mm512_maskz_add_epi32() {
43617 #[rustfmt::skip]
43618 let a = _mm512_setr_epi32(
43619 0, 1, -1, i32::MAX,
43620 i32::MIN, 100, -100, -32,
43621 0, 1, -1, i32::MAX,
43622 i32::MIN, 100, -100, -32,
43623 );
43624 let b = _mm512_set1_epi32(1);
43625 let r = _mm512_maskz_add_epi32(0, a, b);
43626 assert_eq_m512i(r, _mm512_setzero_si512());
43627 let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
43628 #[rustfmt::skip]
43629 let e = _mm512_setr_epi32(
43630 1, 2, 0, i32::MIN,
43631 i32::MIN + 1, 101, -99, -31,
43632 0, 0, 0, 0,
43633 0, 0, 0, 0,
43634 );
43635 assert_eq_m512i(r, e);
43636 }
43637
43638 #[simd_test(enable = "avx512f,avx512vl")]
43639 unsafe fn test_mm256_mask_add_epi32() {
43640 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43641 let b = _mm256_set1_epi32(1);
43642 let r = _mm256_mask_add_epi32(a, 0, a, b);
43643 assert_eq_m256i(r, a);
43644 let r = _mm256_mask_add_epi32(a, 0b11111111, a, b);
43645 let e = _mm256_set_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
43646 assert_eq_m256i(r, e);
43647 }
43648
43649 #[simd_test(enable = "avx512f,avx512vl")]
43650 unsafe fn test_mm256_maskz_add_epi32() {
43651 let a = _mm256_setr_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43652 let b = _mm256_set1_epi32(1);
43653 let r = _mm256_maskz_add_epi32(0, a, b);
43654 assert_eq_m256i(r, _mm256_setzero_si256());
43655 let r = _mm256_maskz_add_epi32(0b11111111, a, b);
43656 let e = _mm256_setr_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
43657 assert_eq_m256i(r, e);
43658 }
43659
43660 #[simd_test(enable = "avx512f,avx512vl")]
43661 unsafe fn test_mm_mask_add_epi32() {
43662 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43663 let b = _mm_set1_epi32(1);
43664 let r = _mm_mask_add_epi32(a, 0, a, b);
43665 assert_eq_m128i(r, a);
43666 let r = _mm_mask_add_epi32(a, 0b00001111, a, b);
43667 let e = _mm_set_epi32(2, 0, i32::MIN, i32::MIN + 1);
43668 assert_eq_m128i(r, e);
43669 }
43670
43671 #[simd_test(enable = "avx512f,avx512vl")]
43672 unsafe fn test_mm_maskz_add_epi32() {
43673 let a = _mm_setr_epi32(1, -1, i32::MAX, i32::MIN);
43674 let b = _mm_set1_epi32(1);
43675 let r = _mm_maskz_add_epi32(0, a, b);
43676 assert_eq_m128i(r, _mm_setzero_si128());
43677 let r = _mm_maskz_add_epi32(0b00001111, a, b);
43678 let e = _mm_setr_epi32(2, 0, i32::MIN, i32::MIN + 1);
43679 assert_eq_m128i(r, e);
43680 }
43681
43682 #[simd_test(enable = "avx512f")]
43683 unsafe fn test_mm512_add_ps() {
43684 #[rustfmt::skip]
43685 let a = _mm512_setr_ps(
43686 0., 1., -1., f32::MAX,
43687 f32::MIN, 100., -100., -32.,
43688 0., 1., -1., f32::MAX,
43689 f32::MIN, 100., -100., -32.,
43690 );
43691 let b = _mm512_set1_ps(1.);
43692 let r = _mm512_add_ps(a, b);
43693 #[rustfmt::skip]
43694 let e = _mm512_setr_ps(
43695 1., 2., 0., f32::MAX,
43696 f32::MIN + 1., 101., -99., -31.,
43697 1., 2., 0., f32::MAX,
43698 f32::MIN + 1., 101., -99., -31.,
43699 );
43700 assert_eq_m512(r, e);
43701 }
43702
43703 #[simd_test(enable = "avx512f")]
43704 unsafe fn test_mm512_mask_add_ps() {
43705 #[rustfmt::skip]
43706 let a = _mm512_setr_ps(
43707 0., 1., -1., f32::MAX,
43708 f32::MIN, 100., -100., -32.,
43709 0., 1., -1., f32::MAX,
43710 f32::MIN, 100., -100., -32.,
43711 );
43712 let b = _mm512_set1_ps(1.);
43713 let r = _mm512_mask_add_ps(a, 0, a, b);
43714 assert_eq_m512(r, a);
43715 let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
43716 #[rustfmt::skip]
43717 let e = _mm512_setr_ps(
43718 1., 2., 0., f32::MAX,
43719 f32::MIN + 1., 101., -99., -31.,
43720 0., 1., -1., f32::MAX,
43721 f32::MIN, 100., -100., -32.,
43722 );
43723 assert_eq_m512(r, e);
43724 }
43725
43726 #[simd_test(enable = "avx512f")]
43727 unsafe fn test_mm512_maskz_add_ps() {
43728 #[rustfmt::skip]
43729 let a = _mm512_setr_ps(
43730 0., 1., -1., f32::MAX,
43731 f32::MIN, 100., -100., -32.,
43732 0., 1., -1., f32::MAX,
43733 f32::MIN, 100., -100., -32.,
43734 );
43735 let b = _mm512_set1_ps(1.);
43736 let r = _mm512_maskz_add_ps(0, a, b);
43737 assert_eq_m512(r, _mm512_setzero_ps());
43738 let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
43739 #[rustfmt::skip]
43740 let e = _mm512_setr_ps(
43741 1., 2., 0., f32::MAX,
43742 f32::MIN + 1., 101., -99., -31.,
43743 0., 0., 0., 0.,
43744 0., 0., 0., 0.,
43745 );
43746 assert_eq_m512(r, e);
43747 }
43748
43749 #[simd_test(enable = "avx512f,avx512vl")]
43750 unsafe fn test_mm256_mask_add_ps() {
43751 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43752 let b = _mm256_set1_ps(1.);
43753 let r = _mm256_mask_add_ps(a, 0, a, b);
43754 assert_eq_m256(r, a);
43755 let r = _mm256_mask_add_ps(a, 0b11111111, a, b);
43756 let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
43757 assert_eq_m256(r, e);
43758 }
43759
43760 #[simd_test(enable = "avx512f,avx512vl")]
43761 unsafe fn test_mm256_maskz_add_ps() {
43762 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43763 let b = _mm256_set1_ps(1.);
43764 let r = _mm256_maskz_add_ps(0, a, b);
43765 assert_eq_m256(r, _mm256_setzero_ps());
43766 let r = _mm256_maskz_add_ps(0b11111111, a, b);
43767 let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
43768 assert_eq_m256(r, e);
43769 }
43770
43771 #[simd_test(enable = "avx512f,avx512vl")]
43772 unsafe fn test_mm_mask_add_ps() {
43773 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43774 let b = _mm_set1_ps(1.);
43775 let r = _mm_mask_add_ps(a, 0, a, b);
43776 assert_eq_m128(r, a);
43777 let r = _mm_mask_add_ps(a, 0b00001111, a, b);
43778 let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
43779 assert_eq_m128(r, e);
43780 }
43781
43782 #[simd_test(enable = "avx512f,avx512vl")]
43783 unsafe fn test_mm_maskz_add_ps() {
43784 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43785 let b = _mm_set1_ps(1.);
43786 let r = _mm_maskz_add_ps(0, a, b);
43787 assert_eq_m128(r, _mm_setzero_ps());
43788 let r = _mm_maskz_add_ps(0b00001111, a, b);
43789 let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
43790 assert_eq_m128(r, e);
43791 }
43792
43793 #[simd_test(enable = "avx512f")]
43794 unsafe fn test_mm512_sub_epi32() {
43795 #[rustfmt::skip]
43796 let a = _mm512_setr_epi32(
43797 0, 1, -1, i32::MAX,
43798 i32::MIN, 100, -100, -32,
43799 0, 1, -1, i32::MAX,
43800 i32::MIN, 100, -100, -32,
43801 );
43802 let b = _mm512_set1_epi32(1);
43803 let r = _mm512_sub_epi32(a, b);
43804 #[rustfmt::skip]
43805 let e = _mm512_setr_epi32(
43806 -1, 0, -2, i32::MAX - 1,
43807 i32::MAX, 99, -101, -33,
43808 -1, 0, -2, i32::MAX - 1,
43809 i32::MAX, 99, -101, -33,
43810 );
43811 assert_eq_m512i(r, e);
43812 }
43813
43814 #[simd_test(enable = "avx512f")]
43815 unsafe fn test_mm512_mask_sub_epi32() {
43816 #[rustfmt::skip]
43817 let a = _mm512_setr_epi32(
43818 0, 1, -1, i32::MAX,
43819 i32::MIN, 100, -100, -32,
43820 0, 1, -1, i32::MAX,
43821 i32::MIN, 100, -100, -32,
43822 );
43823 let b = _mm512_set1_epi32(1);
43824 let r = _mm512_mask_sub_epi32(a, 0, a, b);
43825 assert_eq_m512i(r, a);
43826 let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
43827 #[rustfmt::skip]
43828 let e = _mm512_setr_epi32(
43829 -1, 0, -2, i32::MAX - 1,
43830 i32::MAX, 99, -101, -33,
43831 0, 1, -1, i32::MAX,
43832 i32::MIN, 100, -100, -32,
43833 );
43834 assert_eq_m512i(r, e);
43835 }
43836
43837 #[simd_test(enable = "avx512f")]
43838 unsafe fn test_mm512_maskz_sub_epi32() {
43839 #[rustfmt::skip]
43840 let a = _mm512_setr_epi32(
43841 0, 1, -1, i32::MAX,
43842 i32::MIN, 100, -100, -32,
43843 0, 1, -1, i32::MAX,
43844 i32::MIN, 100, -100, -32,
43845 );
43846 let b = _mm512_set1_epi32(1);
43847 let r = _mm512_maskz_sub_epi32(0, a, b);
43848 assert_eq_m512i(r, _mm512_setzero_si512());
43849 let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b);
43850 #[rustfmt::skip]
43851 let e = _mm512_setr_epi32(
43852 -1, 0, -2, i32::MAX - 1,
43853 i32::MAX, 99, -101, -33,
43854 0, 0, 0, 0,
43855 0, 0, 0, 0,
43856 );
43857 assert_eq_m512i(r, e);
43858 }
43859
43860 #[simd_test(enable = "avx512f,avx512vl")]
43861 unsafe fn test_mm256_mask_sub_epi32() {
43862 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43863 let b = _mm256_set1_epi32(1);
43864 let r = _mm256_mask_sub_epi32(a, 0, a, b);
43865 assert_eq_m256i(r, a);
43866 let r = _mm256_mask_sub_epi32(a, 0b11111111, a, b);
43867 let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
43868 assert_eq_m256i(r, e);
43869 }
43870
43871 #[simd_test(enable = "avx512f,avx512vl")]
43872 unsafe fn test_mm256_maskz_sub_epi32() {
43873 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
43874 let b = _mm256_set1_epi32(1);
43875 let r = _mm256_maskz_sub_epi32(0, a, b);
43876 assert_eq_m256i(r, _mm256_setzero_si256());
43877 let r = _mm256_maskz_sub_epi32(0b11111111, a, b);
43878 let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
43879 assert_eq_m256i(r, e);
43880 }
43881
43882 #[simd_test(enable = "avx512f,avx512vl")]
43883 unsafe fn test_mm_mask_sub_epi32() {
43884 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43885 let b = _mm_set1_epi32(1);
43886 let r = _mm_mask_sub_epi32(a, 0, a, b);
43887 assert_eq_m128i(r, a);
43888 let r = _mm_mask_sub_epi32(a, 0b00001111, a, b);
43889 let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
43890 assert_eq_m128i(r, e);
43891 }
43892
43893 #[simd_test(enable = "avx512f,avx512vl")]
43894 unsafe fn test_mm_maskz_sub_epi32() {
43895 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
43896 let b = _mm_set1_epi32(1);
43897 let r = _mm_maskz_sub_epi32(0, a, b);
43898 assert_eq_m128i(r, _mm_setzero_si128());
43899 let r = _mm_maskz_sub_epi32(0b00001111, a, b);
43900 let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
43901 assert_eq_m128i(r, e);
43902 }
43903
43904 #[simd_test(enable = "avx512f")]
43905 unsafe fn test_mm512_sub_ps() {
43906 #[rustfmt::skip]
43907 let a = _mm512_setr_ps(
43908 0., 1., -1., f32::MAX,
43909 f32::MIN, 100., -100., -32.,
43910 0., 1., -1., f32::MAX,
43911 f32::MIN, 100., -100., -32.,
43912 );
43913 let b = _mm512_set1_ps(1.);
43914 let r = _mm512_sub_ps(a, b);
43915 #[rustfmt::skip]
43916 let e = _mm512_setr_ps(
43917 -1., 0., -2., f32::MAX - 1.,
43918 f32::MIN, 99., -101., -33.,
43919 -1., 0., -2., f32::MAX - 1.,
43920 f32::MIN, 99., -101., -33.,
43921 );
43922 assert_eq_m512(r, e);
43923 }
43924
43925 #[simd_test(enable = "avx512f")]
43926 unsafe fn test_mm512_mask_sub_ps() {
43927 #[rustfmt::skip]
43928 let a = _mm512_setr_ps(
43929 0., 1., -1., f32::MAX,
43930 f32::MIN, 100., -100., -32.,
43931 0., 1., -1., f32::MAX,
43932 f32::MIN, 100., -100., -32.,
43933 );
43934 let b = _mm512_set1_ps(1.);
43935 let r = _mm512_mask_sub_ps(a, 0, a, b);
43936 assert_eq_m512(r, a);
43937 let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b);
43938 #[rustfmt::skip]
43939 let e = _mm512_setr_ps(
43940 -1., 0., -2., f32::MAX - 1.,
43941 f32::MIN, 99., -101., -33.,
43942 0., 1., -1., f32::MAX,
43943 f32::MIN, 100., -100., -32.,
43944 );
43945 assert_eq_m512(r, e);
43946 }
43947
43948 #[simd_test(enable = "avx512f")]
43949 unsafe fn test_mm512_maskz_sub_ps() {
43950 #[rustfmt::skip]
43951 let a = _mm512_setr_ps(
43952 0., 1., -1., f32::MAX,
43953 f32::MIN, 100., -100., -32.,
43954 0., 1., -1., f32::MAX,
43955 f32::MIN, 100., -100., -32.,
43956 );
43957 let b = _mm512_set1_ps(1.);
43958 let r = _mm512_maskz_sub_ps(0, a, b);
43959 assert_eq_m512(r, _mm512_setzero_ps());
43960 let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b);
43961 #[rustfmt::skip]
43962 let e = _mm512_setr_ps(
43963 -1., 0., -2., f32::MAX - 1.,
43964 f32::MIN, 99., -101., -33.,
43965 0., 0., 0., 0.,
43966 0., 0., 0., 0.,
43967 );
43968 assert_eq_m512(r, e);
43969 }
43970
43971 #[simd_test(enable = "avx512f,avx512vl")]
43972 unsafe fn test_mm256_mask_sub_ps() {
43973 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43974 let b = _mm256_set1_ps(1.);
43975 let r = _mm256_mask_sub_ps(a, 0, a, b);
43976 assert_eq_m256(r, a);
43977 let r = _mm256_mask_sub_ps(a, 0b11111111, a, b);
43978 let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
43979 assert_eq_m256(r, e);
43980 }
43981
43982 #[simd_test(enable = "avx512f,avx512vl")]
43983 unsafe fn test_mm256_maskz_sub_ps() {
43984 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
43985 let b = _mm256_set1_ps(1.);
43986 let r = _mm256_maskz_sub_ps(0, a, b);
43987 assert_eq_m256(r, _mm256_setzero_ps());
43988 let r = _mm256_maskz_sub_ps(0b11111111, a, b);
43989 let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
43990 assert_eq_m256(r, e);
43991 }
43992
43993 #[simd_test(enable = "avx512f,avx512vl")]
43994 unsafe fn test_mm_mask_sub_ps() {
43995 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
43996 let b = _mm_set1_ps(1.);
43997 let r = _mm_mask_sub_ps(a, 0, a, b);
43998 assert_eq_m128(r, a);
43999 let r = _mm_mask_sub_ps(a, 0b00001111, a, b);
44000 let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
44001 assert_eq_m128(r, e);
44002 }
44003
44004 #[simd_test(enable = "avx512f,avx512vl")]
44005 unsafe fn test_mm_maskz_sub_ps() {
44006 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44007 let b = _mm_set1_ps(1.);
44008 let r = _mm_maskz_sub_ps(0, a, b);
44009 assert_eq_m128(r, _mm_setzero_ps());
44010 let r = _mm_maskz_sub_ps(0b00001111, a, b);
44011 let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
44012 assert_eq_m128(r, e);
44013 }
44014
44015 #[simd_test(enable = "avx512f")]
44016 unsafe fn test_mm512_mullo_epi32() {
44017 #[rustfmt::skip]
44018 let a = _mm512_setr_epi32(
44019 0, 1, -1, i32::MAX,
44020 i32::MIN, 100, -100, -32,
44021 0, 1, -1, i32::MAX,
44022 i32::MIN, 100, -100, -32,
44023 );
44024 let b = _mm512_set1_epi32(2);
44025 let r = _mm512_mullo_epi32(a, b);
44026 let e = _mm512_setr_epi32(
44027 0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64,
44028 );
44029 assert_eq_m512i(r, e);
44030 }
44031
44032 #[simd_test(enable = "avx512f")]
44033 unsafe fn test_mm512_mask_mullo_epi32() {
44034 #[rustfmt::skip]
44035 let a = _mm512_setr_epi32(
44036 0, 1, -1, i32::MAX,
44037 i32::MIN, 100, -100, -32,
44038 0, 1, -1, i32::MAX,
44039 i32::MIN, 100, -100, -32,
44040 );
44041 let b = _mm512_set1_epi32(2);
44042 let r = _mm512_mask_mullo_epi32(a, 0, a, b);
44043 assert_eq_m512i(r, a);
44044 let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b);
44045 #[rustfmt::skip]
44046 let e = _mm512_setr_epi32(
44047 0, 2, -2, -2,
44048 0, 200, -200, -64,
44049 0, 1, -1, i32::MAX,
44050 i32::MIN, 100, -100, -32,
44051 );
44052 assert_eq_m512i(r, e);
44053 }
44054
44055 #[simd_test(enable = "avx512f")]
44056 unsafe fn test_mm512_maskz_mullo_epi32() {
44057 #[rustfmt::skip]
44058 let a = _mm512_setr_epi32(
44059 0, 1, -1, i32::MAX,
44060 i32::MIN, 100, -100, -32,
44061 0, 1, -1, i32::MAX,
44062 i32::MIN, 100, -100, -32,
44063 );
44064 let b = _mm512_set1_epi32(2);
44065 let r = _mm512_maskz_mullo_epi32(0, a, b);
44066 assert_eq_m512i(r, _mm512_setzero_si512());
44067 let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b);
44068 let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0);
44069 assert_eq_m512i(r, e);
44070 }
44071
44072 #[simd_test(enable = "avx512f,avx512vl")]
44073 unsafe fn test_mm256_mask_mullo_epi32() {
44074 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
44075 let b = _mm256_set1_epi32(2);
44076 let r = _mm256_mask_mullo_epi32(a, 0, a, b);
44077 assert_eq_m256i(r, a);
44078 let r = _mm256_mask_mullo_epi32(a, 0b11111111, a, b);
44079 let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
44080 assert_eq_m256i(r, e);
44081 }
44082
44083 #[simd_test(enable = "avx512f,avx512vl")]
44084 unsafe fn test_mm256_maskz_mullo_epi32() {
44085 let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
44086 let b = _mm256_set1_epi32(2);
44087 let r = _mm256_maskz_mullo_epi32(0, a, b);
44088 assert_eq_m256i(r, _mm256_setzero_si256());
44089 let r = _mm256_maskz_mullo_epi32(0b11111111, a, b);
44090 let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
44091 assert_eq_m256i(r, e);
44092 }
44093
44094 #[simd_test(enable = "avx512f,avx512vl")]
44095 unsafe fn test_mm_mask_mullo_epi32() {
44096 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
44097 let b = _mm_set1_epi32(2);
44098 let r = _mm_mask_mullo_epi32(a, 0, a, b);
44099 assert_eq_m128i(r, a);
44100 let r = _mm_mask_mullo_epi32(a, 0b00001111, a, b);
44101 let e = _mm_set_epi32(2, -2, -2, 0);
44102 assert_eq_m128i(r, e);
44103 }
44104
44105 #[simd_test(enable = "avx512f,avx512vl")]
44106 unsafe fn test_mm_maskz_mullo_epi32() {
44107 let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
44108 let b = _mm_set1_epi32(2);
44109 let r = _mm_maskz_mullo_epi32(0, a, b);
44110 assert_eq_m128i(r, _mm_setzero_si128());
44111 let r = _mm_maskz_mullo_epi32(0b00001111, a, b);
44112 let e = _mm_set_epi32(2, -2, -2, 0);
44113 assert_eq_m128i(r, e);
44114 }
44115
44116 #[simd_test(enable = "avx512f")]
44117 unsafe fn test_mm512_mul_ps() {
44118 #[rustfmt::skip]
44119 let a = _mm512_setr_ps(
44120 0., 1., -1., f32::MAX,
44121 f32::MIN, 100., -100., -32.,
44122 0., 1., -1., f32::MAX,
44123 f32::MIN, 100., -100., -32.,
44124 );
44125 let b = _mm512_set1_ps(2.);
44126 let r = _mm512_mul_ps(a, b);
44127 #[rustfmt::skip]
44128 let e = _mm512_setr_ps(
44129 0., 2., -2., f32::INFINITY,
44130 f32::NEG_INFINITY, 200., -200., -64.,
44131 0., 2., -2., f32::INFINITY,
44132 f32::NEG_INFINITY, 200., -200.,
44133 -64.,
44134 );
44135 assert_eq_m512(r, e);
44136 }
44137
44138 #[simd_test(enable = "avx512f")]
44139 unsafe fn test_mm512_mask_mul_ps() {
44140 #[rustfmt::skip]
44141 let a = _mm512_setr_ps(
44142 0., 1., -1., f32::MAX,
44143 f32::MIN, 100., -100., -32.,
44144 0., 1., -1., f32::MAX,
44145 f32::MIN, 100., -100., -32.,
44146 );
44147 let b = _mm512_set1_ps(2.);
44148 let r = _mm512_mask_mul_ps(a, 0, a, b);
44149 assert_eq_m512(r, a);
44150 let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b);
44151 #[rustfmt::skip]
44152 let e = _mm512_setr_ps(
44153 0., 2., -2., f32::INFINITY,
44154 f32::NEG_INFINITY, 200., -200., -64.,
44155 0., 1., -1., f32::MAX,
44156 f32::MIN, 100., -100., -32.,
44157 );
44158 assert_eq_m512(r, e);
44159 }
44160
44161 #[simd_test(enable = "avx512f")]
44162 unsafe fn test_mm512_maskz_mul_ps() {
44163 #[rustfmt::skip]
44164 let a = _mm512_setr_ps(
44165 0., 1., -1., f32::MAX,
44166 f32::MIN, 100., -100., -32.,
44167 0., 1., -1., f32::MAX,
44168 f32::MIN, 100., -100., -32.,
44169 );
44170 let b = _mm512_set1_ps(2.);
44171 let r = _mm512_maskz_mul_ps(0, a, b);
44172 assert_eq_m512(r, _mm512_setzero_ps());
44173 let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b);
44174 #[rustfmt::skip]
44175 let e = _mm512_setr_ps(
44176 0., 2., -2., f32::INFINITY,
44177 f32::NEG_INFINITY, 200., -200., -64.,
44178 0., 0., 0., 0.,
44179 0., 0., 0., 0.,
44180 );
44181 assert_eq_m512(r, e);
44182 }
44183
44184 #[simd_test(enable = "avx512f,avx512vl")]
44185 unsafe fn test_mm256_mask_mul_ps() {
44186 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44187 let b = _mm256_set1_ps(2.);
44188 let r = _mm256_mask_mul_ps(a, 0, a, b);
44189 assert_eq_m256(r, a);
44190 let r = _mm256_mask_mul_ps(a, 0b11111111, a, b);
44191 #[rustfmt::skip]
44192 let e = _mm256_set_ps(
44193 0., 2., -2., f32::INFINITY,
44194 f32::NEG_INFINITY, 200., -200., -64.,
44195 );
44196 assert_eq_m256(r, e);
44197 }
44198
44199 #[simd_test(enable = "avx512f,avx512vl")]
44200 unsafe fn test_mm256_maskz_mul_ps() {
44201 let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
44202 let b = _mm256_set1_ps(2.);
44203 let r = _mm256_maskz_mul_ps(0, a, b);
44204 assert_eq_m256(r, _mm256_setzero_ps());
44205 let r = _mm256_maskz_mul_ps(0b11111111, a, b);
44206 #[rustfmt::skip]
44207 let e = _mm256_set_ps(
44208 0., 2., -2., f32::INFINITY,
44209 f32::NEG_INFINITY, 200., -200., -64.,
44210 );
44211 assert_eq_m256(r, e);
44212 }
44213
44214 #[simd_test(enable = "avx512f,avx512vl")]
44215 unsafe fn test_mm_mask_mul_ps() {
44216 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44217 let b = _mm_set1_ps(2.);
44218 let r = _mm_mask_mul_ps(a, 0, a, b);
44219 assert_eq_m128(r, a);
44220 let r = _mm_mask_mul_ps(a, 0b00001111, a, b);
44221 let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
44222 assert_eq_m128(r, e);
44223 }
44224
44225 #[simd_test(enable = "avx512f,avx512vl")]
44226 unsafe fn test_mm_maskz_mul_ps() {
44227 let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
44228 let b = _mm_set1_ps(2.);
44229 let r = _mm_maskz_mul_ps(0, a, b);
44230 assert_eq_m128(r, _mm_setzero_ps());
44231 let r = _mm_maskz_mul_ps(0b00001111, a, b);
44232 let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
44233 assert_eq_m128(r, e);
44234 }
44235
44236 #[simd_test(enable = "avx512f")]
44237 unsafe fn test_mm512_div_ps() {
44238 let a = _mm512_setr_ps(
44239 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44240 );
44241 let b = _mm512_setr_ps(
44242 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44243 );
44244 let r = _mm512_div_ps(a, b);
44245 #[rustfmt::skip]
44246 let e = _mm512_setr_ps(
44247 0., 0.5, -0.5, -1.,
44248 50., f32::INFINITY, -50., -16.,
44249 0., 0.5, -0.5, 500.,
44250 f32::NEG_INFINITY, 50., -50., -16.,
44251 );
44252 assert_eq_m512(r, e); // 0/0 = NAN
44253 }
44254
44255 #[simd_test(enable = "avx512f")]
44256 unsafe fn test_mm512_mask_div_ps() {
44257 let a = _mm512_setr_ps(
44258 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44259 );
44260 let b = _mm512_setr_ps(
44261 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44262 );
44263 let r = _mm512_mask_div_ps(a, 0, a, b);
44264 assert_eq_m512(r, a);
44265 let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b);
44266 #[rustfmt::skip]
44267 let e = _mm512_setr_ps(
44268 0., 0.5, -0.5, -1.,
44269 50., f32::INFINITY, -50., -16.,
44270 0., 1., -1., 1000.,
44271 -131., 100., -100., -32.,
44272 );
44273 assert_eq_m512(r, e);
44274 }
44275
44276 #[simd_test(enable = "avx512f")]
44277 unsafe fn test_mm512_maskz_div_ps() {
44278 let a = _mm512_setr_ps(
44279 0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
44280 );
44281 let b = _mm512_setr_ps(
44282 2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
44283 );
44284 let r = _mm512_maskz_div_ps(0, a, b);
44285 assert_eq_m512(r, _mm512_setzero_ps());
44286 let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b);
44287 #[rustfmt::skip]
44288 let e = _mm512_setr_ps(
44289 0., 0.5, -0.5, -1.,
44290 50., f32::INFINITY, -50., -16.,
44291 0., 0., 0., 0.,
44292 0., 0., 0., 0.,
44293 );
44294 assert_eq_m512(r, e);
44295 }
44296
44297 #[simd_test(enable = "avx512f,avx512vl")]
44298 unsafe fn test_mm256_mask_div_ps() {
44299 let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
44300 let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
44301 let r = _mm256_mask_div_ps(a, 0, a, b);
44302 assert_eq_m256(r, a);
44303 let r = _mm256_mask_div_ps(a, 0b11111111, a, b);
44304 let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
44305 assert_eq_m256(r, e);
44306 }
44307
44308 #[simd_test(enable = "avx512f,avx512vl")]
44309 unsafe fn test_mm256_maskz_div_ps() {
44310 let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
44311 let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
44312 let r = _mm256_maskz_div_ps(0, a, b);
44313 assert_eq_m256(r, _mm256_setzero_ps());
44314 let r = _mm256_maskz_div_ps(0b11111111, a, b);
44315 let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
44316 assert_eq_m256(r, e);
44317 }
44318
44319 #[simd_test(enable = "avx512f,avx512vl")]
44320 unsafe fn test_mm_mask_div_ps() {
44321 let a = _mm_set_ps(100., 100., -100., -32.);
44322 let b = _mm_set_ps(2., 0., 2., 2.);
44323 let r = _mm_mask_div_ps(a, 0, a, b);
44324 assert_eq_m128(r, a);
44325 let r = _mm_mask_div_ps(a, 0b00001111, a, b);
44326 let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
44327 assert_eq_m128(r, e);
44328 }
44329
44330 #[simd_test(enable = "avx512f,avx512vl")]
44331 unsafe fn test_mm_maskz_div_ps() {
44332 let a = _mm_set_ps(100., 100., -100., -32.);
44333 let b = _mm_set_ps(2., 0., 2., 2.);
44334 let r = _mm_maskz_div_ps(0, a, b);
44335 assert_eq_m128(r, _mm_setzero_ps());
44336 let r = _mm_maskz_div_ps(0b00001111, a, b);
44337 let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
44338 assert_eq_m128(r, e);
44339 }
44340
44341 #[simd_test(enable = "avx512f")]
44342 unsafe fn test_mm512_max_epi32() {
44343 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44344 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44345 let r = _mm512_max_epi32(a, b);
44346 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44347 assert_eq_m512i(r, e);
44348 }
44349
44350 #[simd_test(enable = "avx512f")]
44351 unsafe fn test_mm512_mask_max_epi32() {
44352 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44353 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44354 let r = _mm512_mask_max_epi32(a, 0, a, b);
44355 assert_eq_m512i(r, a);
44356 let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b);
44357 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44358 assert_eq_m512i(r, e);
44359 }
44360
44361 #[simd_test(enable = "avx512f")]
44362 unsafe fn test_mm512_maskz_max_epi32() {
44363 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44364 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44365 let r = _mm512_maskz_max_epi32(0, a, b);
44366 assert_eq_m512i(r, _mm512_setzero_si512());
44367 let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b);
44368 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
44369 assert_eq_m512i(r, e);
44370 }
44371
44372 #[simd_test(enable = "avx512f,avx512vl")]
44373 unsafe fn test_mm256_mask_max_epi32() {
44374 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44375 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44376 let r = _mm256_mask_max_epi32(a, 0, a, b);
44377 assert_eq_m256i(r, a);
44378 let r = _mm256_mask_max_epi32(a, 0b11111111, a, b);
44379 let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44380 assert_eq_m256i(r, e);
44381 }
44382
44383 #[simd_test(enable = "avx512f,avx512vl")]
44384 unsafe fn test_mm256_maskz_max_epi32() {
44385 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44386 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44387 let r = _mm256_maskz_max_epi32(0, a, b);
44388 assert_eq_m256i(r, _mm256_setzero_si256());
44389 let r = _mm256_maskz_max_epi32(0b11111111, a, b);
44390 let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44391 assert_eq_m256i(r, e);
44392 }
44393
44394 #[simd_test(enable = "avx512f,avx512vl")]
44395 unsafe fn test_mm_mask_max_epi32() {
44396 let a = _mm_set_epi32(0, 1, 2, 3);
44397 let b = _mm_set_epi32(3, 2, 1, 0);
44398 let r = _mm_mask_max_epi32(a, 0, a, b);
44399 assert_eq_m128i(r, a);
44400 let r = _mm_mask_max_epi32(a, 0b00001111, a, b);
44401 let e = _mm_set_epi32(3, 2, 2, 3);
44402 assert_eq_m128i(r, e);
44403 }
44404
44405 #[simd_test(enable = "avx512f,avx512vl")]
44406 unsafe fn test_mm_maskz_max_epi32() {
44407 let a = _mm_set_epi32(0, 1, 2, 3);
44408 let b = _mm_set_epi32(3, 2, 1, 0);
44409 let r = _mm_maskz_max_epi32(0, a, b);
44410 assert_eq_m128i(r, _mm_setzero_si128());
44411 let r = _mm_maskz_max_epi32(0b00001111, a, b);
44412 let e = _mm_set_epi32(3, 2, 2, 3);
44413 assert_eq_m128i(r, e);
44414 }
44415
44416 #[simd_test(enable = "avx512f")]
44417 unsafe fn test_mm512_max_ps() {
44418 let a = _mm512_setr_ps(
44419 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44420 );
44421 let b = _mm512_setr_ps(
44422 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44423 );
44424 let r = _mm512_max_ps(a, b);
44425 let e = _mm512_setr_ps(
44426 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
44427 );
44428 assert_eq_m512(r, e);
44429 }
44430
44431 #[simd_test(enable = "avx512f")]
44432 unsafe fn test_mm512_mask_max_ps() {
44433 let a = _mm512_setr_ps(
44434 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44435 );
44436 let b = _mm512_setr_ps(
44437 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44438 );
44439 let r = _mm512_mask_max_ps(a, 0, a, b);
44440 assert_eq_m512(r, a);
44441 let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b);
44442 let e = _mm512_setr_ps(
44443 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
44444 );
44445 assert_eq_m512(r, e);
44446 }
44447
44448 #[simd_test(enable = "avx512f")]
44449 unsafe fn test_mm512_maskz_max_ps() {
44450 let a = _mm512_setr_ps(
44451 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44452 );
44453 let b = _mm512_setr_ps(
44454 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44455 );
44456 let r = _mm512_maskz_max_ps(0, a, b);
44457 assert_eq_m512(r, _mm512_setzero_ps());
44458 let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b);
44459 let e = _mm512_setr_ps(
44460 15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
44461 );
44462 assert_eq_m512(r, e);
44463 }
44464
44465 #[simd_test(enable = "avx512f,avx512vl")]
44466 unsafe fn test_mm256_mask_max_ps() {
44467 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44468 let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44469 let r = _mm256_mask_max_ps(a, 0, a, b);
44470 assert_eq_m256(r, a);
44471 let r = _mm256_mask_max_ps(a, 0b11111111, a, b);
44472 let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
44473 assert_eq_m256(r, e);
44474 }
44475
44476 #[simd_test(enable = "avx512f,avx512vl")]
44477 unsafe fn test_mm256_maskz_max_ps() {
44478 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44479 let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44480 let r = _mm256_maskz_max_ps(0, a, b);
44481 assert_eq_m256(r, _mm256_setzero_ps());
44482 let r = _mm256_maskz_max_ps(0b11111111, a, b);
44483 let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
44484 assert_eq_m256(r, e);
44485 }
44486
44487 #[simd_test(enable = "avx512f,avx512vl")]
44488 unsafe fn test_mm_mask_max_ps() {
44489 let a = _mm_set_ps(0., 1., 2., 3.);
44490 let b = _mm_set_ps(3., 2., 1., 0.);
44491 let r = _mm_mask_max_ps(a, 0, a, b);
44492 assert_eq_m128(r, a);
44493 let r = _mm_mask_max_ps(a, 0b00001111, a, b);
44494 let e = _mm_set_ps(3., 2., 2., 3.);
44495 assert_eq_m128(r, e);
44496 }
44497
44498 #[simd_test(enable = "avx512f,avx512vl")]
44499 unsafe fn test_mm_maskz_max_ps() {
44500 let a = _mm_set_ps(0., 1., 2., 3.);
44501 let b = _mm_set_ps(3., 2., 1., 0.);
44502 let r = _mm_maskz_max_ps(0, a, b);
44503 assert_eq_m128(r, _mm_setzero_ps());
44504 let r = _mm_mask_max_ps(a, 0b00001111, a, b);
44505 let e = _mm_set_ps(3., 2., 2., 3.);
44506 assert_eq_m128(r, e);
44507 }
44508
44509 #[simd_test(enable = "avx512f")]
44510 unsafe fn test_mm512_max_epu32() {
44511 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44512 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44513 let r = _mm512_max_epu32(a, b);
44514 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44515 assert_eq_m512i(r, e);
44516 }
44517
44518 #[simd_test(enable = "avx512f")]
44519 unsafe fn test_mm512_mask_max_epu32() {
44520 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44521 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44522 let r = _mm512_mask_max_epu32(a, 0, a, b);
44523 assert_eq_m512i(r, a);
44524 let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b);
44525 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
44526 assert_eq_m512i(r, e);
44527 }
44528
44529 #[simd_test(enable = "avx512f")]
44530 unsafe fn test_mm512_maskz_max_epu32() {
44531 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44532 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44533 let r = _mm512_maskz_max_epu32(0, a, b);
44534 assert_eq_m512i(r, _mm512_setzero_si512());
44535 let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b);
44536 let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
44537 assert_eq_m512i(r, e);
44538 }
44539
44540 #[simd_test(enable = "avx512f,avx512vl")]
44541 unsafe fn test_mm256_mask_max_epu32() {
44542 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44543 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44544 let r = _mm256_mask_max_epu32(a, 0, a, b);
44545 assert_eq_m256i(r, a);
44546 let r = _mm256_mask_max_epu32(a, 0b11111111, a, b);
44547 let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44548 assert_eq_m256i(r, e);
44549 }
44550
44551 #[simd_test(enable = "avx512f,avx512vl")]
44552 unsafe fn test_mm256_maskz_max_epu32() {
44553 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44554 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44555 let r = _mm256_maskz_max_epu32(0, a, b);
44556 assert_eq_m256i(r, _mm256_setzero_si256());
44557 let r = _mm256_maskz_max_epu32(0b11111111, a, b);
44558 let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
44559 assert_eq_m256i(r, e);
44560 }
44561
44562 #[simd_test(enable = "avx512f,avx512vl")]
44563 unsafe fn test_mm_mask_max_epu32() {
44564 let a = _mm_set_epi32(0, 1, 2, 3);
44565 let b = _mm_set_epi32(3, 2, 1, 0);
44566 let r = _mm_mask_max_epu32(a, 0, a, b);
44567 assert_eq_m128i(r, a);
44568 let r = _mm_mask_max_epu32(a, 0b00001111, a, b);
44569 let e = _mm_set_epi32(3, 2, 2, 3);
44570 assert_eq_m128i(r, e);
44571 }
44572
44573 #[simd_test(enable = "avx512f,avx512vl")]
44574 unsafe fn test_mm_maskz_max_epu32() {
44575 let a = _mm_set_epi32(0, 1, 2, 3);
44576 let b = _mm_set_epi32(3, 2, 1, 0);
44577 let r = _mm_maskz_max_epu32(0, a, b);
44578 assert_eq_m128i(r, _mm_setzero_si128());
44579 let r = _mm_maskz_max_epu32(0b00001111, a, b);
44580 let e = _mm_set_epi32(3, 2, 2, 3);
44581 assert_eq_m128i(r, e);
44582 }
44583
44584 #[simd_test(enable = "avx512f")]
44585 unsafe fn test_mm512_min_epi32() {
44586 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44587 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44588 let r = _mm512_min_epi32(a, b);
44589 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
44590 assert_eq_m512i(r, e);
44591 }
44592
44593 #[simd_test(enable = "avx512f")]
44594 unsafe fn test_mm512_mask_min_epi32() {
44595 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44596 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44597 let r = _mm512_mask_min_epi32(a, 0, a, b);
44598 assert_eq_m512i(r, a);
44599 let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b);
44600 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44601 assert_eq_m512i(r, e);
44602 }
44603
44604 #[simd_test(enable = "avx512f")]
44605 unsafe fn test_mm512_maskz_min_epi32() {
44606 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44607 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44608 let r = _mm512_maskz_min_epi32(0, a, b);
44609 assert_eq_m512i(r, _mm512_setzero_si512());
44610 let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b);
44611 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
44612 assert_eq_m512i(r, e);
44613 }
44614
44615 #[simd_test(enable = "avx512f,avx512vl")]
44616 unsafe fn test_mm256_mask_min_epi32() {
44617 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44618 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44619 let r = _mm256_mask_min_epi32(a, 0, a, b);
44620 assert_eq_m256i(r, a);
44621 let r = _mm256_mask_min_epi32(a, 0b11111111, a, b);
44622 let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44623 assert_eq_m256i(r, e);
44624 }
44625
44626 #[simd_test(enable = "avx512f,avx512vl")]
44627 unsafe fn test_mm256_maskz_min_epi32() {
44628 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44629 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44630 let r = _mm256_maskz_min_epi32(0, a, b);
44631 assert_eq_m256i(r, _mm256_setzero_si256());
44632 let r = _mm256_maskz_min_epi32(0b11111111, a, b);
44633 let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44634 assert_eq_m256i(r, e);
44635 }
44636
44637 #[simd_test(enable = "avx512f,avx512vl")]
44638 unsafe fn test_mm_mask_min_epi32() {
44639 let a = _mm_set_epi32(0, 1, 2, 3);
44640 let b = _mm_set_epi32(3, 2, 1, 0);
44641 let r = _mm_mask_min_epi32(a, 0, a, b);
44642 assert_eq_m128i(r, a);
44643 let r = _mm_mask_min_epi32(a, 0b00001111, a, b);
44644 let e = _mm_set_epi32(0, 1, 1, 0);
44645 assert_eq_m128i(r, e);
44646 }
44647
44648 #[simd_test(enable = "avx512f,avx512vl")]
44649 unsafe fn test_mm_maskz_min_epi32() {
44650 let a = _mm_set_epi32(0, 1, 2, 3);
44651 let b = _mm_set_epi32(3, 2, 1, 0);
44652 let r = _mm_maskz_min_epi32(0, a, b);
44653 assert_eq_m128i(r, _mm_setzero_si128());
44654 let r = _mm_maskz_min_epi32(0b00001111, a, b);
44655 let e = _mm_set_epi32(0, 1, 1, 0);
44656 assert_eq_m128i(r, e);
44657 }
44658
44659 #[simd_test(enable = "avx512f")]
44660 unsafe fn test_mm512_min_ps() {
44661 let a = _mm512_setr_ps(
44662 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44663 );
44664 let b = _mm512_setr_ps(
44665 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44666 );
44667 let r = _mm512_min_ps(a, b);
44668 let e = _mm512_setr_ps(
44669 0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
44670 );
44671 assert_eq_m512(r, e);
44672 }
44673
44674 #[simd_test(enable = "avx512f")]
44675 unsafe fn test_mm512_mask_min_ps() {
44676 let a = _mm512_setr_ps(
44677 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44678 );
44679 let b = _mm512_setr_ps(
44680 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44681 );
44682 let r = _mm512_mask_min_ps(a, 0, a, b);
44683 assert_eq_m512(r, a);
44684 let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b);
44685 let e = _mm512_setr_ps(
44686 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44687 );
44688 assert_eq_m512(r, e);
44689 }
44690
44691 #[simd_test(enable = "avx512f")]
44692 unsafe fn test_mm512_maskz_min_ps() {
44693 let a = _mm512_setr_ps(
44694 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44695 );
44696 let b = _mm512_setr_ps(
44697 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
44698 );
44699 let r = _mm512_maskz_min_ps(0, a, b);
44700 assert_eq_m512(r, _mm512_setzero_ps());
44701 let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b);
44702 let e = _mm512_setr_ps(
44703 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
44704 );
44705 assert_eq_m512(r, e);
44706 }
44707
44708 #[simd_test(enable = "avx512f,avx512vl")]
44709 unsafe fn test_mm256_mask_min_ps() {
44710 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44711 let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44712 let r = _mm256_mask_min_ps(a, 0, a, b);
44713 assert_eq_m256(r, a);
44714 let r = _mm256_mask_min_ps(a, 0b11111111, a, b);
44715 let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
44716 assert_eq_m256(r, e);
44717 }
44718
44719 #[simd_test(enable = "avx512f,avx512vl")]
44720 unsafe fn test_mm256_maskz_min_ps() {
44721 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44722 let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
44723 let r = _mm256_maskz_min_ps(0, a, b);
44724 assert_eq_m256(r, _mm256_setzero_ps());
44725 let r = _mm256_maskz_min_ps(0b11111111, a, b);
44726 let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
44727 assert_eq_m256(r, e);
44728 }
44729
44730 #[simd_test(enable = "avx512f,avx512vl")]
44731 unsafe fn test_mm_mask_min_ps() {
44732 let a = _mm_set_ps(0., 1., 2., 3.);
44733 let b = _mm_set_ps(3., 2., 1., 0.);
44734 let r = _mm_mask_min_ps(a, 0, a, b);
44735 assert_eq_m128(r, a);
44736 let r = _mm_mask_min_ps(a, 0b00001111, a, b);
44737 let e = _mm_set_ps(0., 1., 1., 0.);
44738 assert_eq_m128(r, e);
44739 }
44740
44741 #[simd_test(enable = "avx512f,avx512vl")]
44742 unsafe fn test_mm_maskz_min_ps() {
44743 let a = _mm_set_ps(0., 1., 2., 3.);
44744 let b = _mm_set_ps(3., 2., 1., 0.);
44745 let r = _mm_maskz_min_ps(0, a, b);
44746 assert_eq_m128(r, _mm_setzero_ps());
44747 let r = _mm_maskz_min_ps(0b00001111, a, b);
44748 let e = _mm_set_ps(0., 1., 1., 0.);
44749 assert_eq_m128(r, e);
44750 }
44751
44752 #[simd_test(enable = "avx512f")]
44753 unsafe fn test_mm512_min_epu32() {
44754 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44755 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44756 let r = _mm512_min_epu32(a, b);
44757 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
44758 assert_eq_m512i(r, e);
44759 }
44760
44761 #[simd_test(enable = "avx512f")]
44762 unsafe fn test_mm512_mask_min_epu32() {
44763 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44764 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44765 let r = _mm512_mask_min_epu32(a, 0, a, b);
44766 assert_eq_m512i(r, a);
44767 let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b);
44768 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44769 assert_eq_m512i(r, e);
44770 }
44771
44772 #[simd_test(enable = "avx512f")]
44773 unsafe fn test_mm512_maskz_min_epu32() {
44774 let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
44775 let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
44776 let r = _mm512_maskz_min_epu32(0, a, b);
44777 assert_eq_m512i(r, _mm512_setzero_si512());
44778 let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b);
44779 let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
44780 assert_eq_m512i(r, e);
44781 }
44782
44783 #[simd_test(enable = "avx512f,avx512vl")]
44784 unsafe fn test_mm256_mask_min_epu32() {
44785 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44786 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44787 let r = _mm256_mask_min_epu32(a, 0, a, b);
44788 assert_eq_m256i(r, a);
44789 let r = _mm256_mask_min_epu32(a, 0b11111111, a, b);
44790 let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44791 assert_eq_m256i(r, e);
44792 }
44793
44794 #[simd_test(enable = "avx512f,avx512vl")]
44795 unsafe fn test_mm256_maskz_min_epu32() {
44796 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
44797 let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
44798 let r = _mm256_maskz_min_epu32(0, a, b);
44799 assert_eq_m256i(r, _mm256_setzero_si256());
44800 let r = _mm256_maskz_min_epu32(0b11111111, a, b);
44801 let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
44802 assert_eq_m256i(r, e);
44803 }
44804
44805 #[simd_test(enable = "avx512f,avx512vl")]
44806 unsafe fn test_mm_mask_min_epu32() {
44807 let a = _mm_set_epi32(0, 1, 2, 3);
44808 let b = _mm_set_epi32(3, 2, 1, 0);
44809 let r = _mm_mask_min_epu32(a, 0, a, b);
44810 assert_eq_m128i(r, a);
44811 let r = _mm_mask_min_epu32(a, 0b00001111, a, b);
44812 let e = _mm_set_epi32(0, 1, 1, 0);
44813 assert_eq_m128i(r, e);
44814 }
44815
44816 #[simd_test(enable = "avx512f,avx512vl")]
44817 unsafe fn test_mm_maskz_min_epu32() {
44818 let a = _mm_set_epi32(0, 1, 2, 3);
44819 let b = _mm_set_epi32(3, 2, 1, 0);
44820 let r = _mm_maskz_min_epu32(0, a, b);
44821 assert_eq_m128i(r, _mm_setzero_si128());
44822 let r = _mm_maskz_min_epu32(0b00001111, a, b);
44823 let e = _mm_set_epi32(0, 1, 1, 0);
44824 assert_eq_m128i(r, e);
44825 }
44826
44827 #[simd_test(enable = "avx512f")]
44828 unsafe fn test_mm512_sqrt_ps() {
44829 let a = _mm512_setr_ps(
44830 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44831 );
44832 let r = _mm512_sqrt_ps(a);
44833 let e = _mm512_setr_ps(
44834 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44835 );
44836 assert_eq_m512(r, e);
44837 }
44838
44839 #[simd_test(enable = "avx512f")]
44840 unsafe fn test_mm512_mask_sqrt_ps() {
44841 let a = _mm512_setr_ps(
44842 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44843 );
44844 let r = _mm512_mask_sqrt_ps(a, 0, a);
44845 assert_eq_m512(r, a);
44846 let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a);
44847 let e = _mm512_setr_ps(
44848 0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225.,
44849 );
44850 assert_eq_m512(r, e);
44851 }
44852
44853 #[simd_test(enable = "avx512f")]
44854 unsafe fn test_mm512_maskz_sqrt_ps() {
44855 let a = _mm512_setr_ps(
44856 0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
44857 );
44858 let r = _mm512_maskz_sqrt_ps(0, a);
44859 assert_eq_m512(r, _mm512_setzero_ps());
44860 let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a);
44861 let e = _mm512_setr_ps(
44862 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
44863 );
44864 assert_eq_m512(r, e);
44865 }
44866
44867 #[simd_test(enable = "avx512f,avx512vl")]
44868 unsafe fn test_mm256_mask_sqrt_ps() {
44869 let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
44870 let r = _mm256_mask_sqrt_ps(a, 0, a);
44871 assert_eq_m256(r, a);
44872 let r = _mm256_mask_sqrt_ps(a, 0b11111111, a);
44873 let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44874 assert_eq_m256(r, e);
44875 }
44876
44877 #[simd_test(enable = "avx512f,avx512vl")]
44878 unsafe fn test_mm256_maskz_sqrt_ps() {
44879 let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
44880 let r = _mm256_maskz_sqrt_ps(0, a);
44881 assert_eq_m256(r, _mm256_setzero_ps());
44882 let r = _mm256_maskz_sqrt_ps(0b11111111, a);
44883 let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44884 assert_eq_m256(r, e);
44885 }
44886
44887 #[simd_test(enable = "avx512f,avx512vl")]
44888 unsafe fn test_mm_mask_sqrt_ps() {
44889 let a = _mm_set_ps(0., 1., 4., 9.);
44890 let r = _mm_mask_sqrt_ps(a, 0, a);
44891 assert_eq_m128(r, a);
44892 let r = _mm_mask_sqrt_ps(a, 0b00001111, a);
44893 let e = _mm_set_ps(0., 1., 2., 3.);
44894 assert_eq_m128(r, e);
44895 }
44896
44897 #[simd_test(enable = "avx512f,avx512vl")]
44898 unsafe fn test_mm_maskz_sqrt_ps() {
44899 let a = _mm_set_ps(0., 1., 4., 9.);
44900 let r = _mm_maskz_sqrt_ps(0, a);
44901 assert_eq_m128(r, _mm_setzero_ps());
44902 let r = _mm_maskz_sqrt_ps(0b00001111, a);
44903 let e = _mm_set_ps(0., 1., 2., 3.);
44904 assert_eq_m128(r, e);
44905 }
44906
44907 #[simd_test(enable = "avx512f")]
44908 unsafe fn test_mm512_fmadd_ps() {
44909 let a = _mm512_set1_ps(1.);
44910 let b = _mm512_setr_ps(
44911 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44912 );
44913 let c = _mm512_set1_ps(1.);
44914 let r = _mm512_fmadd_ps(a, b, c);
44915 let e = _mm512_setr_ps(
44916 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
44917 );
44918 assert_eq_m512(r, e);
44919 }
44920
44921 #[simd_test(enable = "avx512f")]
44922 unsafe fn test_mm512_mask_fmadd_ps() {
44923 let a = _mm512_set1_ps(1.);
44924 let b = _mm512_setr_ps(
44925 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44926 );
44927 let c = _mm512_set1_ps(1.);
44928 let r = _mm512_mask_fmadd_ps(a, 0, b, c);
44929 assert_eq_m512(r, a);
44930 let r = _mm512_mask_fmadd_ps(a, 0b00000000_11111111, b, c);
44931 let e = _mm512_setr_ps(
44932 1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
44933 );
44934 assert_eq_m512(r, e);
44935 }
44936
44937 #[simd_test(enable = "avx512f")]
44938 unsafe fn test_mm512_maskz_fmadd_ps() {
44939 let a = _mm512_set1_ps(1.);
44940 let b = _mm512_setr_ps(
44941 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44942 );
44943 let c = _mm512_set1_ps(1.);
44944 let r = _mm512_maskz_fmadd_ps(0, a, b, c);
44945 assert_eq_m512(r, _mm512_setzero_ps());
44946 let r = _mm512_maskz_fmadd_ps(0b00000000_11111111, a, b, c);
44947 let e = _mm512_setr_ps(
44948 1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
44949 );
44950 assert_eq_m512(r, e);
44951 }
44952
44953 #[simd_test(enable = "avx512f")]
44954 unsafe fn test_mm512_mask3_fmadd_ps() {
44955 let a = _mm512_set1_ps(1.);
44956 let b = _mm512_setr_ps(
44957 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
44958 );
44959 let c = _mm512_set1_ps(2.);
44960 let r = _mm512_mask3_fmadd_ps(a, b, c, 0);
44961 assert_eq_m512(r, c);
44962 let r = _mm512_mask3_fmadd_ps(a, b, c, 0b00000000_11111111);
44963 let e = _mm512_setr_ps(
44964 2., 3., 4., 5., 6., 7., 8., 9., 2., 2., 2., 2., 2., 2., 2., 2.,
44965 );
44966 assert_eq_m512(r, e);
44967 }
44968
44969 #[simd_test(enable = "avx512f,avx512vl")]
44970 unsafe fn test_mm256_mask_fmadd_ps() {
44971 let a = _mm256_set1_ps(1.);
44972 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44973 let c = _mm256_set1_ps(1.);
44974 let r = _mm256_mask_fmadd_ps(a, 0, b, c);
44975 assert_eq_m256(r, a);
44976 let r = _mm256_mask_fmadd_ps(a, 0b11111111, b, c);
44977 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
44978 assert_eq_m256(r, e);
44979 }
44980
44981 #[simd_test(enable = "avx512f,avx512vl")]
44982 unsafe fn test_mm256_maskz_fmadd_ps() {
44983 let a = _mm256_set1_ps(1.);
44984 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44985 let c = _mm256_set1_ps(1.);
44986 let r = _mm256_maskz_fmadd_ps(0, a, b, c);
44987 assert_eq_m256(r, _mm256_setzero_ps());
44988 let r = _mm256_maskz_fmadd_ps(0b11111111, a, b, c);
44989 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
44990 assert_eq_m256(r, e);
44991 }
44992
44993 #[simd_test(enable = "avx512f,avx512vl")]
44994 unsafe fn test_mm256_mask3_fmadd_ps() {
44995 let a = _mm256_set1_ps(1.);
44996 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
44997 let c = _mm256_set1_ps(1.);
44998 let r = _mm256_mask3_fmadd_ps(a, b, c, 0);
44999 assert_eq_m256(r, c);
45000 let r = _mm256_mask3_fmadd_ps(a, b, c, 0b11111111);
45001 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
45002 assert_eq_m256(r, e);
45003 }
45004
45005 #[simd_test(enable = "avx512f,avx512vl")]
45006 unsafe fn test_mm_mask_fmadd_ps() {
45007 let a = _mm_set1_ps(1.);
45008 let b = _mm_set_ps(0., 1., 2., 3.);
45009 let c = _mm_set1_ps(1.);
45010 let r = _mm_mask_fmadd_ps(a, 0, b, c);
45011 assert_eq_m128(r, a);
45012 let r = _mm_mask_fmadd_ps(a, 0b00001111, b, c);
45013 let e = _mm_set_ps(1., 2., 3., 4.);
45014 assert_eq_m128(r, e);
45015 }
45016
45017 #[simd_test(enable = "avx512f,avx512vl")]
45018 unsafe fn test_mm_maskz_fmadd_ps() {
45019 let a = _mm_set1_ps(1.);
45020 let b = _mm_set_ps(0., 1., 2., 3.);
45021 let c = _mm_set1_ps(1.);
45022 let r = _mm_maskz_fmadd_ps(0, a, b, c);
45023 assert_eq_m128(r, _mm_setzero_ps());
45024 let r = _mm_maskz_fmadd_ps(0b00001111, a, b, c);
45025 let e = _mm_set_ps(1., 2., 3., 4.);
45026 assert_eq_m128(r, e);
45027 }
45028
45029 #[simd_test(enable = "avx512f,avx512vl")]
45030 unsafe fn test_mm_mask3_fmadd_ps() {
45031 let a = _mm_set1_ps(1.);
45032 let b = _mm_set_ps(0., 1., 2., 3.);
45033 let c = _mm_set1_ps(1.);
45034 let r = _mm_mask3_fmadd_ps(a, b, c, 0);
45035 assert_eq_m128(r, c);
45036 let r = _mm_mask3_fmadd_ps(a, b, c, 0b00001111);
45037 let e = _mm_set_ps(1., 2., 3., 4.);
45038 assert_eq_m128(r, e);
45039 }
45040
45041 #[simd_test(enable = "avx512f")]
45042 unsafe fn test_mm512_fmsub_ps() {
45043 let a = _mm512_setr_ps(
45044 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45045 );
45046 let b = _mm512_setr_ps(
45047 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45048 );
45049 let c = _mm512_setr_ps(
45050 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45051 );
45052 let r = _mm512_fmsub_ps(a, b, c);
45053 let e = _mm512_setr_ps(
45054 -1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
45055 );
45056 assert_eq_m512(r, e);
45057 }
45058
45059 #[simd_test(enable = "avx512f")]
45060 unsafe fn test_mm512_mask_fmsub_ps() {
45061 let a = _mm512_set1_ps(1.);
45062 let b = _mm512_setr_ps(
45063 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45064 );
45065 let c = _mm512_set1_ps(1.);
45066 let r = _mm512_mask_fmsub_ps(a, 0, b, c);
45067 assert_eq_m512(r, a);
45068 let r = _mm512_mask_fmsub_ps(a, 0b00000000_11111111, b, c);
45069 let e = _mm512_setr_ps(
45070 -1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
45071 );
45072 assert_eq_m512(r, e);
45073 }
45074
45075 #[simd_test(enable = "avx512f")]
45076 unsafe fn test_mm512_maskz_fmsub_ps() {
45077 let a = _mm512_set1_ps(1.);
45078 let b = _mm512_setr_ps(
45079 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45080 );
45081 let c = _mm512_set1_ps(1.);
45082 let r = _mm512_maskz_fmsub_ps(0, a, b, c);
45083 assert_eq_m512(r, _mm512_setzero_ps());
45084 let r = _mm512_maskz_fmsub_ps(0b00000000_11111111, a, b, c);
45085 let e = _mm512_setr_ps(
45086 -1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
45087 );
45088 assert_eq_m512(r, e);
45089 }
45090
45091 #[simd_test(enable = "avx512f")]
45092 unsafe fn test_mm512_mask3_fmsub_ps() {
45093 let a = _mm512_set1_ps(1.);
45094 let b = _mm512_setr_ps(
45095 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45096 );
45097 let c = _mm512_setr_ps(
45098 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45099 );
45100 let r = _mm512_mask3_fmsub_ps(a, b, c, 0);
45101 assert_eq_m512(r, c);
45102 let r = _mm512_mask3_fmsub_ps(a, b, c, 0b00000000_11111111);
45103 let e = _mm512_setr_ps(
45104 -1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
45105 );
45106 assert_eq_m512(r, e);
45107 }
45108
45109 #[simd_test(enable = "avx512f,avx512vl")]
45110 unsafe fn test_mm256_mask_fmsub_ps() {
45111 let a = _mm256_set1_ps(1.);
45112 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45113 let c = _mm256_set1_ps(1.);
45114 let r = _mm256_mask_fmsub_ps(a, 0, b, c);
45115 assert_eq_m256(r, a);
45116 let r = _mm256_mask_fmsub_ps(a, 0b11111111, b, c);
45117 let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45118 assert_eq_m256(r, e);
45119 }
45120
45121 #[simd_test(enable = "avx512f,avx512vl")]
45122 unsafe fn test_mm256_maskz_fmsub_ps() {
45123 let a = _mm256_set1_ps(1.);
45124 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45125 let c = _mm256_set1_ps(1.);
45126 let r = _mm256_maskz_fmsub_ps(0, a, b, c);
45127 assert_eq_m256(r, _mm256_setzero_ps());
45128 let r = _mm256_maskz_fmsub_ps(0b11111111, a, b, c);
45129 let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45130 assert_eq_m256(r, e);
45131 }
45132
45133 #[simd_test(enable = "avx512f,avx512vl")]
45134 unsafe fn test_mm256_mask3_fmsub_ps() {
45135 let a = _mm256_set1_ps(1.);
45136 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45137 let c = _mm256_set1_ps(1.);
45138 let r = _mm256_mask3_fmsub_ps(a, b, c, 0);
45139 assert_eq_m256(r, c);
45140 let r = _mm256_mask3_fmsub_ps(a, b, c, 0b11111111);
45141 let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
45142 assert_eq_m256(r, e);
45143 }
45144
45145 #[simd_test(enable = "avx512f,avx512vl")]
45146 unsafe fn test_mm_mask_fmsub_ps() {
45147 let a = _mm_set1_ps(1.);
45148 let b = _mm_set_ps(0., 1., 2., 3.);
45149 let c = _mm_set1_ps(1.);
45150 let r = _mm_mask_fmsub_ps(a, 0, b, c);
45151 assert_eq_m128(r, a);
45152 let r = _mm_mask_fmsub_ps(a, 0b00001111, b, c);
45153 let e = _mm_set_ps(-1., 0., 1., 2.);
45154 assert_eq_m128(r, e);
45155 }
45156
45157 #[simd_test(enable = "avx512f,avx512vl")]
45158 unsafe fn test_mm_maskz_fmsub_ps() {
45159 let a = _mm_set1_ps(1.);
45160 let b = _mm_set_ps(0., 1., 2., 3.);
45161 let c = _mm_set1_ps(1.);
45162 let r = _mm_maskz_fmsub_ps(0, a, b, c);
45163 assert_eq_m128(r, _mm_setzero_ps());
45164 let r = _mm_maskz_fmsub_ps(0b00001111, a, b, c);
45165 let e = _mm_set_ps(-1., 0., 1., 2.);
45166 assert_eq_m128(r, e);
45167 }
45168
45169 #[simd_test(enable = "avx512f,avx512vl")]
45170 unsafe fn test_mm_mask3_fmsub_ps() {
45171 let a = _mm_set1_ps(1.);
45172 let b = _mm_set_ps(0., 1., 2., 3.);
45173 let c = _mm_set1_ps(1.);
45174 let r = _mm_mask3_fmsub_ps(a, b, c, 0);
45175 assert_eq_m128(r, c);
45176 let r = _mm_mask3_fmsub_ps(a, b, c, 0b00001111);
45177 let e = _mm_set_ps(-1., 0., 1., 2.);
45178 assert_eq_m128(r, e);
45179 }
45180
45181 #[simd_test(enable = "avx512f")]
45182 unsafe fn test_mm512_fmaddsub_ps() {
45183 let a = _mm512_set1_ps(1.);
45184 let b = _mm512_setr_ps(
45185 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45186 );
45187 let c = _mm512_set1_ps(1.);
45188 let r = _mm512_fmaddsub_ps(a, b, c);
45189 let e = _mm512_setr_ps(
45190 -1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16.,
45191 );
45192 assert_eq_m512(r, e);
45193 }
45194
45195 #[simd_test(enable = "avx512f")]
45196 unsafe fn test_mm512_mask_fmaddsub_ps() {
45197 let a = _mm512_set1_ps(1.);
45198 let b = _mm512_setr_ps(
45199 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45200 );
45201 let c = _mm512_set1_ps(1.);
45202 let r = _mm512_mask_fmaddsub_ps(a, 0, b, c);
45203 assert_eq_m512(r, a);
45204 let r = _mm512_mask_fmaddsub_ps(a, 0b00000000_11111111, b, c);
45205 let e = _mm512_setr_ps(
45206 -1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
45207 );
45208 assert_eq_m512(r, e);
45209 }
45210
45211 #[simd_test(enable = "avx512f")]
45212 unsafe fn test_mm512_maskz_fmaddsub_ps() {
45213 let a = _mm512_set1_ps(1.);
45214 let b = _mm512_setr_ps(
45215 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45216 );
45217 let c = _mm512_set1_ps(1.);
45218 let r = _mm512_maskz_fmaddsub_ps(0, a, b, c);
45219 assert_eq_m512(r, _mm512_setzero_ps());
45220 let r = _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a, b, c);
45221 let e = _mm512_setr_ps(
45222 -1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
45223 );
45224 assert_eq_m512(r, e);
45225 }
45226
45227 #[simd_test(enable = "avx512f")]
45228 unsafe fn test_mm512_mask3_fmaddsub_ps() {
45229 let a = _mm512_set1_ps(1.);
45230 let b = _mm512_setr_ps(
45231 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45232 );
45233 let c = _mm512_setr_ps(
45234 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45235 );
45236 let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0);
45237 assert_eq_m512(r, c);
45238 let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0b00000000_11111111);
45239 let e = _mm512_setr_ps(
45240 -1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
45241 );
45242 assert_eq_m512(r, e);
45243 }
45244
45245 #[simd_test(enable = "avx512f,avx512vl")]
45246 unsafe fn test_mm256_mask_fmaddsub_ps() {
45247 let a = _mm256_set1_ps(1.);
45248 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45249 let c = _mm256_set1_ps(1.);
45250 let r = _mm256_mask_fmaddsub_ps(a, 0, b, c);
45251 assert_eq_m256(r, a);
45252 let r = _mm256_mask_fmaddsub_ps(a, 0b11111111, b, c);
45253 let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45254 assert_eq_m256(r, e);
45255 }
45256
45257 #[simd_test(enable = "avx512f,avx512vl")]
45258 unsafe fn test_mm256_maskz_fmaddsub_ps() {
45259 let a = _mm256_set1_ps(1.);
45260 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45261 let c = _mm256_set1_ps(1.);
45262 let r = _mm256_maskz_fmaddsub_ps(0, a, b, c);
45263 assert_eq_m256(r, _mm256_setzero_ps());
45264 let r = _mm256_maskz_fmaddsub_ps(0b11111111, a, b, c);
45265 let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45266 assert_eq_m256(r, e);
45267 }
45268
45269 #[simd_test(enable = "avx512f,avx512vl")]
45270 unsafe fn test_mm256_mask3_fmaddsub_ps() {
45271 let a = _mm256_set1_ps(1.);
45272 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45273 let c = _mm256_set1_ps(1.);
45274 let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0);
45275 assert_eq_m256(r, c);
45276 let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0b11111111);
45277 let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
45278 assert_eq_m256(r, e);
45279 }
45280
45281 #[simd_test(enable = "avx512f,avx512vl")]
45282 unsafe fn test_mm_mask_fmaddsub_ps() {
45283 let a = _mm_set1_ps(1.);
45284 let b = _mm_set_ps(0., 1., 2., 3.);
45285 let c = _mm_set1_ps(1.);
45286 let r = _mm_mask_fmaddsub_ps(a, 0, b, c);
45287 assert_eq_m128(r, a);
45288 let r = _mm_mask_fmaddsub_ps(a, 0b00001111, b, c);
45289 let e = _mm_set_ps(1., 0., 3., 2.);
45290 assert_eq_m128(r, e);
45291 }
45292
45293 #[simd_test(enable = "avx512f,avx512vl")]
45294 unsafe fn test_mm_maskz_fmaddsub_ps() {
45295 let a = _mm_set1_ps(1.);
45296 let b = _mm_set_ps(0., 1., 2., 3.);
45297 let c = _mm_set1_ps(1.);
45298 let r = _mm_maskz_fmaddsub_ps(0, a, b, c);
45299 assert_eq_m128(r, _mm_setzero_ps());
45300 let r = _mm_maskz_fmaddsub_ps(0b00001111, a, b, c);
45301 let e = _mm_set_ps(1., 0., 3., 2.);
45302 assert_eq_m128(r, e);
45303 }
45304
45305 #[simd_test(enable = "avx512f,avx512vl")]
45306 unsafe fn test_mm_mask3_fmaddsub_ps() {
45307 let a = _mm_set1_ps(1.);
45308 let b = _mm_set_ps(0., 1., 2., 3.);
45309 let c = _mm_set1_ps(1.);
45310 let r = _mm_mask3_fmaddsub_ps(a, b, c, 0);
45311 assert_eq_m128(r, c);
45312 let r = _mm_mask3_fmaddsub_ps(a, b, c, 0b00001111);
45313 let e = _mm_set_ps(1., 0., 3., 2.);
45314 assert_eq_m128(r, e);
45315 }
45316
45317 #[simd_test(enable = "avx512f")]
45318 unsafe fn test_mm512_fmsubadd_ps() {
45319 let a = _mm512_setr_ps(
45320 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45321 );
45322 let b = _mm512_setr_ps(
45323 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45324 );
45325 let c = _mm512_setr_ps(
45326 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
45327 );
45328 let r = _mm512_fmsubadd_ps(a, b, c);
45329 let e = _mm512_setr_ps(
45330 1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14.,
45331 );
45332 assert_eq_m512(r, e);
45333 }
45334
45335 #[simd_test(enable = "avx512f")]
45336 unsafe fn test_mm512_mask_fmsubadd_ps() {
45337 let a = _mm512_set1_ps(1.);
45338 let b = _mm512_setr_ps(
45339 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45340 );
45341 let c = _mm512_set1_ps(1.);
45342 let r = _mm512_mask_fmsubadd_ps(a, 0, b, c);
45343 assert_eq_m512(r, a);
45344 let r = _mm512_mask_fmsubadd_ps(a, 0b00000000_11111111, b, c);
45345 let e = _mm512_setr_ps(
45346 1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
45347 );
45348 assert_eq_m512(r, e);
45349 }
45350
45351 #[simd_test(enable = "avx512f")]
45352 unsafe fn test_mm512_maskz_fmsubadd_ps() {
45353 let a = _mm512_set1_ps(1.);
45354 let b = _mm512_setr_ps(
45355 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45356 );
45357 let c = _mm512_set1_ps(1.);
45358 let r = _mm512_maskz_fmsubadd_ps(0, a, b, c);
45359 assert_eq_m512(r, _mm512_setzero_ps());
45360 let r = _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a, b, c);
45361 let e = _mm512_setr_ps(
45362 1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
45363 );
45364 assert_eq_m512(r, e);
45365 }
45366
45367 #[simd_test(enable = "avx512f")]
45368 unsafe fn test_mm512_mask3_fmsubadd_ps() {
45369 let a = _mm512_set1_ps(1.);
45370 let b = _mm512_setr_ps(
45371 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45372 );
45373 let c = _mm512_setr_ps(
45374 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45375 );
45376 let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0);
45377 assert_eq_m512(r, c);
45378 let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0b00000000_11111111);
45379 let e = _mm512_setr_ps(
45380 1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
45381 );
45382 assert_eq_m512(r, e);
45383 }
45384
45385 #[simd_test(enable = "avx512f,avx512vl")]
45386 unsafe fn test_mm256_mask_fmsubadd_ps() {
45387 let a = _mm256_set1_ps(1.);
45388 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45389 let c = _mm256_set1_ps(1.);
45390 let r = _mm256_mask_fmsubadd_ps(a, 0, b, c);
45391 assert_eq_m256(r, a);
45392 let r = _mm256_mask_fmsubadd_ps(a, 0b11111111, b, c);
45393 let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45394 assert_eq_m256(r, e);
45395 }
45396
45397 #[simd_test(enable = "avx512f,avx512vl")]
45398 unsafe fn test_mm256_maskz_fmsubadd_ps() {
45399 let a = _mm256_set1_ps(1.);
45400 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45401 let c = _mm256_set1_ps(1.);
45402 let r = _mm256_maskz_fmsubadd_ps(0, a, b, c);
45403 assert_eq_m256(r, _mm256_setzero_ps());
45404 let r = _mm256_maskz_fmsubadd_ps(0b11111111, a, b, c);
45405 let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45406 assert_eq_m256(r, e);
45407 }
45408
45409 #[simd_test(enable = "avx512f,avx512vl")]
45410 unsafe fn test_mm256_mask3_fmsubadd_ps() {
45411 let a = _mm256_set1_ps(1.);
45412 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45413 let c = _mm256_set1_ps(1.);
45414 let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0);
45415 assert_eq_m256(r, c);
45416 let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0b11111111);
45417 let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
45418 assert_eq_m256(r, e);
45419 }
45420
45421 #[simd_test(enable = "avx512f,avx512vl")]
45422 unsafe fn test_mm_mask_fmsubadd_ps() {
45423 let a = _mm_set1_ps(1.);
45424 let b = _mm_set_ps(0., 1., 2., 3.);
45425 let c = _mm_set1_ps(1.);
45426 let r = _mm_mask_fmsubadd_ps(a, 0, b, c);
45427 assert_eq_m128(r, a);
45428 let r = _mm_mask_fmsubadd_ps(a, 0b00001111, b, c);
45429 let e = _mm_set_ps(-1., 2., 1., 4.);
45430 assert_eq_m128(r, e);
45431 }
45432
45433 #[simd_test(enable = "avx512f,avx512vl")]
45434 unsafe fn test_mm_maskz_fmsubadd_ps() {
45435 let a = _mm_set1_ps(1.);
45436 let b = _mm_set_ps(0., 1., 2., 3.);
45437 let c = _mm_set1_ps(1.);
45438 let r = _mm_maskz_fmsubadd_ps(0, a, b, c);
45439 assert_eq_m128(r, _mm_setzero_ps());
45440 let r = _mm_maskz_fmsubadd_ps(0b00001111, a, b, c);
45441 let e = _mm_set_ps(-1., 2., 1., 4.);
45442 assert_eq_m128(r, e);
45443 }
45444
45445 #[simd_test(enable = "avx512f,avx512vl")]
45446 unsafe fn test_mm_mask3_fmsubadd_ps() {
45447 let a = _mm_set1_ps(1.);
45448 let b = _mm_set_ps(0., 1., 2., 3.);
45449 let c = _mm_set1_ps(1.);
45450 let r = _mm_mask3_fmsubadd_ps(a, b, c, 0);
45451 assert_eq_m128(r, c);
45452 let r = _mm_mask3_fmsubadd_ps(a, b, c, 0b00001111);
45453 let e = _mm_set_ps(-1., 2., 1., 4.);
45454 assert_eq_m128(r, e);
45455 }
45456
45457 #[simd_test(enable = "avx512f")]
45458 unsafe fn test_mm512_fnmadd_ps() {
45459 let a = _mm512_set1_ps(1.);
45460 let b = _mm512_setr_ps(
45461 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45462 );
45463 let c = _mm512_set1_ps(1.);
45464 let r = _mm512_fnmadd_ps(a, b, c);
45465 let e = _mm512_setr_ps(
45466 1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14.,
45467 );
45468 assert_eq_m512(r, e);
45469 }
45470
45471 #[simd_test(enable = "avx512f")]
45472 unsafe fn test_mm512_mask_fnmadd_ps() {
45473 let a = _mm512_set1_ps(1.);
45474 let b = _mm512_setr_ps(
45475 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45476 );
45477 let c = _mm512_set1_ps(1.);
45478 let r = _mm512_mask_fnmadd_ps(a, 0, b, c);
45479 assert_eq_m512(r, a);
45480 let r = _mm512_mask_fnmadd_ps(a, 0b00000000_11111111, b, c);
45481 let e = _mm512_setr_ps(
45482 1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1.,
45483 );
45484 assert_eq_m512(r, e);
45485 }
45486
45487 #[simd_test(enable = "avx512f")]
45488 unsafe fn test_mm512_maskz_fnmadd_ps() {
45489 let a = _mm512_set1_ps(1.);
45490 let b = _mm512_setr_ps(
45491 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45492 );
45493 let c = _mm512_set1_ps(1.);
45494 let r = _mm512_maskz_fnmadd_ps(0, a, b, c);
45495 assert_eq_m512(r, _mm512_setzero_ps());
45496 let r = _mm512_maskz_fnmadd_ps(0b00000000_11111111, a, b, c);
45497 let e = _mm512_setr_ps(
45498 1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0.,
45499 );
45500 assert_eq_m512(r, e);
45501 }
45502
45503 #[simd_test(enable = "avx512f")]
45504 unsafe fn test_mm512_mask3_fnmadd_ps() {
45505 let a = _mm512_set1_ps(1.);
45506 let b = _mm512_setr_ps(
45507 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45508 );
45509 let c = _mm512_setr_ps(
45510 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45511 );
45512 let r = _mm512_mask3_fnmadd_ps(a, b, c, 0);
45513 assert_eq_m512(r, c);
45514 let r = _mm512_mask3_fnmadd_ps(a, b, c, 0b00000000_11111111);
45515 let e = _mm512_setr_ps(
45516 1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2.,
45517 );
45518 assert_eq_m512(r, e);
45519 }
45520
45521 #[simd_test(enable = "avx512f,avx512vl")]
45522 unsafe fn test_mm256_mask_fnmadd_ps() {
45523 let a = _mm256_set1_ps(1.);
45524 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45525 let c = _mm256_set1_ps(1.);
45526 let r = _mm256_mask_fnmadd_ps(a, 0, b, c);
45527 assert_eq_m256(r, a);
45528 let r = _mm256_mask_fnmadd_ps(a, 0b11111111, b, c);
45529 let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45530 assert_eq_m256(r, e);
45531 }
45532
45533 #[simd_test(enable = "avx512f,avx512vl")]
45534 unsafe fn test_mm256_maskz_fnmadd_ps() {
45535 let a = _mm256_set1_ps(1.);
45536 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45537 let c = _mm256_set1_ps(1.);
45538 let r = _mm256_maskz_fnmadd_ps(0, a, b, c);
45539 assert_eq_m256(r, _mm256_setzero_ps());
45540 let r = _mm256_maskz_fnmadd_ps(0b11111111, a, b, c);
45541 let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45542 assert_eq_m256(r, e);
45543 }
45544
45545 #[simd_test(enable = "avx512f,avx512vl")]
45546 unsafe fn test_mm256_mask3_fnmadd_ps() {
45547 let a = _mm256_set1_ps(1.);
45548 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45549 let c = _mm256_set1_ps(1.);
45550 let r = _mm256_mask3_fnmadd_ps(a, b, c, 0);
45551 assert_eq_m256(r, c);
45552 let r = _mm256_mask3_fnmadd_ps(a, b, c, 0b11111111);
45553 let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
45554 assert_eq_m256(r, e);
45555 }
45556
45557 #[simd_test(enable = "avx512f,avx512vl")]
45558 unsafe fn test_mm_mask_fnmadd_ps() {
45559 let a = _mm_set1_ps(1.);
45560 let b = _mm_set_ps(0., 1., 2., 3.);
45561 let c = _mm_set1_ps(1.);
45562 let r = _mm_mask_fnmadd_ps(a, 0, b, c);
45563 assert_eq_m128(r, a);
45564 let r = _mm_mask_fnmadd_ps(a, 0b00001111, b, c);
45565 let e = _mm_set_ps(1., 0., -1., -2.);
45566 assert_eq_m128(r, e);
45567 }
45568
45569 #[simd_test(enable = "avx512f,avx512vl")]
45570 unsafe fn test_mm_maskz_fnmadd_ps() {
45571 let a = _mm_set1_ps(1.);
45572 let b = _mm_set_ps(0., 1., 2., 3.);
45573 let c = _mm_set1_ps(1.);
45574 let r = _mm_maskz_fnmadd_ps(0, a, b, c);
45575 assert_eq_m128(r, _mm_setzero_ps());
45576 let r = _mm_maskz_fnmadd_ps(0b00001111, a, b, c);
45577 let e = _mm_set_ps(1., 0., -1., -2.);
45578 assert_eq_m128(r, e);
45579 }
45580
45581 #[simd_test(enable = "avx512f,avx512vl")]
45582 unsafe fn test_mm_mask3_fnmadd_ps() {
45583 let a = _mm_set1_ps(1.);
45584 let b = _mm_set_ps(0., 1., 2., 3.);
45585 let c = _mm_set1_ps(1.);
45586 let r = _mm_mask3_fnmadd_ps(a, b, c, 0);
45587 assert_eq_m128(r, c);
45588 let r = _mm_mask3_fnmadd_ps(a, b, c, 0b00001111);
45589 let e = _mm_set_ps(1., 0., -1., -2.);
45590 assert_eq_m128(r, e);
45591 }
45592
45593 #[simd_test(enable = "avx512f")]
45594 unsafe fn test_mm512_fnmsub_ps() {
45595 let a = _mm512_set1_ps(1.);
45596 let b = _mm512_setr_ps(
45597 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45598 );
45599 let c = _mm512_set1_ps(1.);
45600 let r = _mm512_fnmsub_ps(a, b, c);
45601 let e = _mm512_setr_ps(
45602 -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16.,
45603 );
45604 assert_eq_m512(r, e);
45605 }
45606
45607 #[simd_test(enable = "avx512f")]
45608 unsafe fn test_mm512_mask_fnmsub_ps() {
45609 let a = _mm512_set1_ps(1.);
45610 let b = _mm512_setr_ps(
45611 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45612 );
45613 let c = _mm512_set1_ps(1.);
45614 let r = _mm512_mask_fnmsub_ps(a, 0, b, c);
45615 assert_eq_m512(r, a);
45616 let r = _mm512_mask_fnmsub_ps(a, 0b00000000_11111111, b, c);
45617 let e = _mm512_setr_ps(
45618 -1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1.,
45619 );
45620 assert_eq_m512(r, e);
45621 }
45622
45623 #[simd_test(enable = "avx512f")]
45624 unsafe fn test_mm512_maskz_fnmsub_ps() {
45625 let a = _mm512_set1_ps(1.);
45626 let b = _mm512_setr_ps(
45627 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45628 );
45629 let c = _mm512_set1_ps(1.);
45630 let r = _mm512_maskz_fnmsub_ps(0, a, b, c);
45631 assert_eq_m512(r, _mm512_setzero_ps());
45632 let r = _mm512_maskz_fnmsub_ps(0b00000000_11111111, a, b, c);
45633 let e = _mm512_setr_ps(
45634 -1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
45635 );
45636 assert_eq_m512(r, e);
45637 }
45638
45639 #[simd_test(enable = "avx512f")]
45640 unsafe fn test_mm512_mask3_fnmsub_ps() {
45641 let a = _mm512_set1_ps(1.);
45642 let b = _mm512_setr_ps(
45643 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
45644 );
45645 let c = _mm512_setr_ps(
45646 1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
45647 );
45648 let r = _mm512_mask3_fnmsub_ps(a, b, c, 0);
45649 assert_eq_m512(r, c);
45650 let r = _mm512_mask3_fnmsub_ps(a, b, c, 0b00000000_11111111);
45651 let e = _mm512_setr_ps(
45652 -1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2.,
45653 );
45654 assert_eq_m512(r, e);
45655 }
45656
45657 #[simd_test(enable = "avx512f,avx512vl")]
45658 unsafe fn test_mm256_mask_fnmsub_ps() {
45659 let a = _mm256_set1_ps(1.);
45660 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45661 let c = _mm256_set1_ps(1.);
45662 let r = _mm256_mask_fnmsub_ps(a, 0, b, c);
45663 assert_eq_m256(r, a);
45664 let r = _mm256_mask_fnmsub_ps(a, 0b11111111, b, c);
45665 let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45666 assert_eq_m256(r, e);
45667 }
45668
45669 #[simd_test(enable = "avx512f,avx512vl")]
45670 unsafe fn test_mm256_maskz_fnmsub_ps() {
45671 let a = _mm256_set1_ps(1.);
45672 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45673 let c = _mm256_set1_ps(1.);
45674 let r = _mm256_maskz_fnmsub_ps(0, a, b, c);
45675 assert_eq_m256(r, _mm256_setzero_ps());
45676 let r = _mm256_maskz_fnmsub_ps(0b11111111, a, b, c);
45677 let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45678 assert_eq_m256(r, e);
45679 }
45680
45681 #[simd_test(enable = "avx512f,avx512vl")]
45682 unsafe fn test_mm256_mask3_fnmsub_ps() {
45683 let a = _mm256_set1_ps(1.);
45684 let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
45685 let c = _mm256_set1_ps(1.);
45686 let r = _mm256_mask3_fnmsub_ps(a, b, c, 0);
45687 assert_eq_m256(r, c);
45688 let r = _mm256_mask3_fnmsub_ps(a, b, c, 0b11111111);
45689 let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
45690 assert_eq_m256(r, e);
45691 }
45692
45693 #[simd_test(enable = "avx512f,avx512vl")]
45694 unsafe fn test_mm_mask_fnmsub_ps() {
45695 let a = _mm_set1_ps(1.);
45696 let b = _mm_set_ps(0., 1., 2., 3.);
45697 let c = _mm_set1_ps(1.);
45698 let r = _mm_mask_fnmsub_ps(a, 0, b, c);
45699 assert_eq_m128(r, a);
45700 let r = _mm_mask_fnmsub_ps(a, 0b00001111, b, c);
45701 let e = _mm_set_ps(-1., -2., -3., -4.);
45702 assert_eq_m128(r, e);
45703 }
45704
45705 #[simd_test(enable = "avx512f,avx512vl")]
45706 unsafe fn test_mm_maskz_fnmsub_ps() {
45707 let a = _mm_set1_ps(1.);
45708 let b = _mm_set_ps(0., 1., 2., 3.);
45709 let c = _mm_set1_ps(1.);
45710 let r = _mm_maskz_fnmsub_ps(0, a, b, c);
45711 assert_eq_m128(r, _mm_setzero_ps());
45712 let r = _mm_maskz_fnmsub_ps(0b00001111, a, b, c);
45713 let e = _mm_set_ps(-1., -2., -3., -4.);
45714 assert_eq_m128(r, e);
45715 }
45716
45717 #[simd_test(enable = "avx512f,avx512vl")]
45718 unsafe fn test_mm_mask3_fnmsub_ps() {
45719 let a = _mm_set1_ps(1.);
45720 let b = _mm_set_ps(0., 1., 2., 3.);
45721 let c = _mm_set1_ps(1.);
45722 let r = _mm_mask3_fnmsub_ps(a, b, c, 0);
45723 assert_eq_m128(r, c);
45724 let r = _mm_mask3_fnmsub_ps(a, b, c, 0b00001111);
45725 let e = _mm_set_ps(-1., -2., -3., -4.);
45726 assert_eq_m128(r, e);
45727 }
45728
45729 #[simd_test(enable = "avx512f")]
45730 unsafe fn test_mm512_rcp14_ps() {
45731 let a = _mm512_set1_ps(3.);
45732 let r = _mm512_rcp14_ps(a);
45733 let e = _mm512_set1_ps(0.33333206);
45734 assert_eq_m512(r, e);
45735 }
45736
45737 #[simd_test(enable = "avx512f")]
45738 unsafe fn test_mm512_mask_rcp14_ps() {
45739 let a = _mm512_set1_ps(3.);
45740 let r = _mm512_mask_rcp14_ps(a, 0, a);
45741 assert_eq_m512(r, a);
45742 let r = _mm512_mask_rcp14_ps(a, 0b11111111_00000000, a);
45743 let e = _mm512_setr_ps(
45744 3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45745 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45746 );
45747 assert_eq_m512(r, e);
45748 }
45749
45750 #[simd_test(enable = "avx512f")]
45751 unsafe fn test_mm512_maskz_rcp14_ps() {
45752 let a = _mm512_set1_ps(3.);
45753 let r = _mm512_maskz_rcp14_ps(0, a);
45754 assert_eq_m512(r, _mm512_setzero_ps());
45755 let r = _mm512_maskz_rcp14_ps(0b11111111_00000000, a);
45756 let e = _mm512_setr_ps(
45757 0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45758 0.33333206, 0.33333206, 0.33333206, 0.33333206,
45759 );
45760 assert_eq_m512(r, e);
45761 }
45762
45763 #[simd_test(enable = "avx512f,avx512vl")]
45764 unsafe fn test_mm256_rcp14_ps() {
45765 let a = _mm256_set1_ps(3.);
45766 let r = _mm256_rcp14_ps(a);
45767 let e = _mm256_set1_ps(0.33333206);
45768 assert_eq_m256(r, e);
45769 }
45770
45771 #[simd_test(enable = "avx512f,avx512vl")]
45772 unsafe fn test_mm256_mask_rcp14_ps() {
45773 let a = _mm256_set1_ps(3.);
45774 let r = _mm256_mask_rcp14_ps(a, 0, a);
45775 assert_eq_m256(r, a);
45776 let r = _mm256_mask_rcp14_ps(a, 0b11111111, a);
45777 let e = _mm256_set1_ps(0.33333206);
45778 assert_eq_m256(r, e);
45779 }
45780
45781 #[simd_test(enable = "avx512f,avx512vl")]
45782 unsafe fn test_mm256_maskz_rcp14_ps() {
45783 let a = _mm256_set1_ps(3.);
45784 let r = _mm256_maskz_rcp14_ps(0, a);
45785 assert_eq_m256(r, _mm256_setzero_ps());
45786 let r = _mm256_maskz_rcp14_ps(0b11111111, a);
45787 let e = _mm256_set1_ps(0.33333206);
45788 assert_eq_m256(r, e);
45789 }
45790
45791 #[simd_test(enable = "avx512f,avx512vl")]
45792 unsafe fn test_mm_rcp14_ps() {
45793 let a = _mm_set1_ps(3.);
45794 let r = _mm_rcp14_ps(a);
45795 let e = _mm_set1_ps(0.33333206);
45796 assert_eq_m128(r, e);
45797 }
45798
45799 #[simd_test(enable = "avx512f,avx512vl")]
45800 unsafe fn test_mm_mask_rcp14_ps() {
45801 let a = _mm_set1_ps(3.);
45802 let r = _mm_mask_rcp14_ps(a, 0, a);
45803 assert_eq_m128(r, a);
45804 let r = _mm_mask_rcp14_ps(a, 0b00001111, a);
45805 let e = _mm_set1_ps(0.33333206);
45806 assert_eq_m128(r, e);
45807 }
45808
45809 #[simd_test(enable = "avx512f,avx512vl")]
45810 unsafe fn test_mm_maskz_rcp14_ps() {
45811 let a = _mm_set1_ps(3.);
45812 let r = _mm_maskz_rcp14_ps(0, a);
45813 assert_eq_m128(r, _mm_setzero_ps());
45814 let r = _mm_maskz_rcp14_ps(0b00001111, a);
45815 let e = _mm_set1_ps(0.33333206);
45816 assert_eq_m128(r, e);
45817 }
45818
45819 #[simd_test(enable = "avx512f")]
45820 unsafe fn test_mm512_rsqrt14_ps() {
45821 let a = _mm512_set1_ps(3.);
45822 let r = _mm512_rsqrt14_ps(a);
45823 let e = _mm512_set1_ps(0.5773392);
45824 assert_eq_m512(r, e);
45825 }
45826
45827 #[simd_test(enable = "avx512f")]
45828 unsafe fn test_mm512_mask_rsqrt14_ps() {
45829 let a = _mm512_set1_ps(3.);
45830 let r = _mm512_mask_rsqrt14_ps(a, 0, a);
45831 assert_eq_m512(r, a);
45832 let r = _mm512_mask_rsqrt14_ps(a, 0b11111111_00000000, a);
45833 let e = _mm512_setr_ps(
45834 3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
45835 0.5773392, 0.5773392, 0.5773392,
45836 );
45837 assert_eq_m512(r, e);
45838 }
45839
45840 #[simd_test(enable = "avx512f")]
45841 unsafe fn test_mm512_maskz_rsqrt14_ps() {
45842 let a = _mm512_set1_ps(3.);
45843 let r = _mm512_maskz_rsqrt14_ps(0, a);
45844 assert_eq_m512(r, _mm512_setzero_ps());
45845 let r = _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a);
45846 let e = _mm512_setr_ps(
45847 0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
45848 0.5773392, 0.5773392, 0.5773392,
45849 );
45850 assert_eq_m512(r, e);
45851 }
45852
45853 #[simd_test(enable = "avx512f,avx512vl")]
45854 unsafe fn test_mm256_rsqrt14_ps() {
45855 let a = _mm256_set1_ps(3.);
45856 let r = _mm256_rsqrt14_ps(a);
45857 let e = _mm256_set1_ps(0.5773392);
45858 assert_eq_m256(r, e);
45859 }
45860
45861 #[simd_test(enable = "avx512f,avx512vl")]
45862 unsafe fn test_mm256_mask_rsqrt14_ps() {
45863 let a = _mm256_set1_ps(3.);
45864 let r = _mm256_mask_rsqrt14_ps(a, 0, a);
45865 assert_eq_m256(r, a);
45866 let r = _mm256_mask_rsqrt14_ps(a, 0b11111111, a);
45867 let e = _mm256_set1_ps(0.5773392);
45868 assert_eq_m256(r, e);
45869 }
45870
45871 #[simd_test(enable = "avx512f,avx512vl")]
45872 unsafe fn test_mm256_maskz_rsqrt14_ps() {
45873 let a = _mm256_set1_ps(3.);
45874 let r = _mm256_maskz_rsqrt14_ps(0, a);
45875 assert_eq_m256(r, _mm256_setzero_ps());
45876 let r = _mm256_maskz_rsqrt14_ps(0b11111111, a);
45877 let e = _mm256_set1_ps(0.5773392);
45878 assert_eq_m256(r, e);
45879 }
45880
45881 #[simd_test(enable = "avx512f,avx512vl")]
45882 unsafe fn test_mm_rsqrt14_ps() {
45883 let a = _mm_set1_ps(3.);
45884 let r = _mm_rsqrt14_ps(a);
45885 let e = _mm_set1_ps(0.5773392);
45886 assert_eq_m128(r, e);
45887 }
45888
45889 #[simd_test(enable = "avx512f,avx512vl")]
45890 unsafe fn test_mm_mask_rsqrt14_ps() {
45891 let a = _mm_set1_ps(3.);
45892 let r = _mm_mask_rsqrt14_ps(a, 0, a);
45893 assert_eq_m128(r, a);
45894 let r = _mm_mask_rsqrt14_ps(a, 0b00001111, a);
45895 let e = _mm_set1_ps(0.5773392);
45896 assert_eq_m128(r, e);
45897 }
45898
45899 #[simd_test(enable = "avx512f,avx512vl")]
45900 unsafe fn test_mm_maskz_rsqrt14_ps() {
45901 let a = _mm_set1_ps(3.);
45902 let r = _mm_maskz_rsqrt14_ps(0, a);
45903 assert_eq_m128(r, _mm_setzero_ps());
45904 let r = _mm_maskz_rsqrt14_ps(0b00001111, a);
45905 let e = _mm_set1_ps(0.5773392);
45906 assert_eq_m128(r, e);
45907 }
45908
45909 #[simd_test(enable = "avx512f")]
45910 unsafe fn test_mm512_getexp_ps() {
45911 let a = _mm512_set1_ps(3.);
45912 let r = _mm512_getexp_ps(a);
45913 let e = _mm512_set1_ps(1.);
45914 assert_eq_m512(r, e);
45915 }
45916
45917 #[simd_test(enable = "avx512f")]
45918 unsafe fn test_mm512_mask_getexp_ps() {
45919 let a = _mm512_set1_ps(3.);
45920 let r = _mm512_mask_getexp_ps(a, 0, a);
45921 assert_eq_m512(r, a);
45922 let r = _mm512_mask_getexp_ps(a, 0b11111111_00000000, a);
45923 let e = _mm512_setr_ps(
45924 3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
45925 );
45926 assert_eq_m512(r, e);
45927 }
45928
45929 #[simd_test(enable = "avx512f")]
45930 unsafe fn test_mm512_maskz_getexp_ps() {
45931 let a = _mm512_set1_ps(3.);
45932 let r = _mm512_maskz_getexp_ps(0, a);
45933 assert_eq_m512(r, _mm512_setzero_ps());
45934 let r = _mm512_maskz_getexp_ps(0b11111111_00000000, a);
45935 let e = _mm512_setr_ps(
45936 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
45937 );
45938 assert_eq_m512(r, e);
45939 }
45940
45941 #[simd_test(enable = "avx512f,avx512vl")]
45942 unsafe fn test_mm256_getexp_ps() {
45943 let a = _mm256_set1_ps(3.);
45944 let r = _mm256_getexp_ps(a);
45945 let e = _mm256_set1_ps(1.);
45946 assert_eq_m256(r, e);
45947 }
45948
45949 #[simd_test(enable = "avx512f,avx512vl")]
45950 unsafe fn test_mm256_mask_getexp_ps() {
45951 let a = _mm256_set1_ps(3.);
45952 let r = _mm256_mask_getexp_ps(a, 0, a);
45953 assert_eq_m256(r, a);
45954 let r = _mm256_mask_getexp_ps(a, 0b11111111, a);
45955 let e = _mm256_set1_ps(1.);
45956 assert_eq_m256(r, e);
45957 }
45958
45959 #[simd_test(enable = "avx512f,avx512vl")]
45960 unsafe fn test_mm256_maskz_getexp_ps() {
45961 let a = _mm256_set1_ps(3.);
45962 let r = _mm256_maskz_getexp_ps(0, a);
45963 assert_eq_m256(r, _mm256_setzero_ps());
45964 let r = _mm256_maskz_getexp_ps(0b11111111, a);
45965 let e = _mm256_set1_ps(1.);
45966 assert_eq_m256(r, e);
45967 }
45968
45969 #[simd_test(enable = "avx512f,avx512vl")]
45970 unsafe fn test_mm_getexp_ps() {
45971 let a = _mm_set1_ps(3.);
45972 let r = _mm_getexp_ps(a);
45973 let e = _mm_set1_ps(1.);
45974 assert_eq_m128(r, e);
45975 }
45976
45977 #[simd_test(enable = "avx512f,avx512vl")]
45978 unsafe fn test_mm_mask_getexp_ps() {
45979 let a = _mm_set1_ps(3.);
45980 let r = _mm_mask_getexp_ps(a, 0, a);
45981 assert_eq_m128(r, a);
45982 let r = _mm_mask_getexp_ps(a, 0b00001111, a);
45983 let e = _mm_set1_ps(1.);
45984 assert_eq_m128(r, e);
45985 }
45986
45987 #[simd_test(enable = "avx512f,avx512vl")]
45988 unsafe fn test_mm_maskz_getexp_ps() {
45989 let a = _mm_set1_ps(3.);
45990 let r = _mm_maskz_getexp_ps(0, a);
45991 assert_eq_m128(r, _mm_setzero_ps());
45992 let r = _mm_maskz_getexp_ps(0b00001111, a);
45993 let e = _mm_set1_ps(1.);
45994 assert_eq_m128(r, e);
45995 }
45996
45997 #[simd_test(enable = "avx512f")]
45998 unsafe fn test_mm512_roundscale_ps() {
45999 let a = _mm512_set1_ps(1.1);
46000 let r = _mm512_roundscale_ps::<0b00_00_00_00>(a);
46001 let e = _mm512_set1_ps(1.0);
46002 assert_eq_m512(r, e);
46003 }
46004
46005 #[simd_test(enable = "avx512f")]
46006 unsafe fn test_mm512_mask_roundscale_ps() {
46007 let a = _mm512_set1_ps(1.1);
46008 let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46009 let e = _mm512_set1_ps(1.1);
46010 assert_eq_m512(r, e);
46011 let r = _mm512_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111_11111111, a);
46012 let e = _mm512_set1_ps(1.0);
46013 assert_eq_m512(r, e);
46014 }
46015
46016 #[simd_test(enable = "avx512f")]
46017 unsafe fn test_mm512_maskz_roundscale_ps() {
46018 let a = _mm512_set1_ps(1.1);
46019 let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46020 assert_eq_m512(r, _mm512_setzero_ps());
46021 let r = _mm512_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111_11111111, a);
46022 let e = _mm512_set1_ps(1.0);
46023 assert_eq_m512(r, e);
46024 }
46025
46026 #[simd_test(enable = "avx512f,avx512vl")]
46027 unsafe fn test_mm256_roundscale_ps() {
46028 let a = _mm256_set1_ps(1.1);
46029 let r = _mm256_roundscale_ps::<0b00_00_00_00>(a);
46030 let e = _mm256_set1_ps(1.0);
46031 assert_eq_m256(r, e);
46032 }
46033
46034 #[simd_test(enable = "avx512f,avx512vl")]
46035 unsafe fn test_mm256_mask_roundscale_ps() {
46036 let a = _mm256_set1_ps(1.1);
46037 let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46038 let e = _mm256_set1_ps(1.1);
46039 assert_eq_m256(r, e);
46040 let r = _mm256_mask_roundscale_ps::<0b00_00_00_00>(a, 0b11111111, a);
46041 let e = _mm256_set1_ps(1.0);
46042 assert_eq_m256(r, e);
46043 }
46044
46045 #[simd_test(enable = "avx512f,avx512vl")]
46046 unsafe fn test_mm256_maskz_roundscale_ps() {
46047 let a = _mm256_set1_ps(1.1);
46048 let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46049 assert_eq_m256(r, _mm256_setzero_ps());
46050 let r = _mm256_maskz_roundscale_ps::<0b00_00_00_00>(0b11111111, a);
46051 let e = _mm256_set1_ps(1.0);
46052 assert_eq_m256(r, e);
46053 }
46054
46055 #[simd_test(enable = "avx512f,avx512vl")]
46056 unsafe fn test_mm_roundscale_ps() {
46057 let a = _mm_set1_ps(1.1);
46058 let r = _mm_roundscale_ps::<0b00_00_00_00>(a);
46059 let e = _mm_set1_ps(1.0);
46060 assert_eq_m128(r, e);
46061 }
46062
46063 #[simd_test(enable = "avx512f,avx512vl")]
46064 unsafe fn test_mm_mask_roundscale_ps() {
46065 let a = _mm_set1_ps(1.1);
46066 let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0, a);
46067 let e = _mm_set1_ps(1.1);
46068 assert_eq_m128(r, e);
46069 let r = _mm_mask_roundscale_ps::<0b00_00_00_00>(a, 0b00001111, a);
46070 let e = _mm_set1_ps(1.0);
46071 assert_eq_m128(r, e);
46072 }
46073
46074 #[simd_test(enable = "avx512f,avx512vl")]
46075 unsafe fn test_mm_maskz_roundscale_ps() {
46076 let a = _mm_set1_ps(1.1);
46077 let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0, a);
46078 assert_eq_m128(r, _mm_setzero_ps());
46079 let r = _mm_maskz_roundscale_ps::<0b00_00_00_00>(0b00001111, a);
46080 let e = _mm_set1_ps(1.0);
46081 assert_eq_m128(r, e);
46082 }
46083
46084 #[simd_test(enable = "avx512f")]
46085 unsafe fn test_mm512_scalef_ps() {
46086 let a = _mm512_set1_ps(1.);
46087 let b = _mm512_set1_ps(3.);
46088 let r = _mm512_scalef_ps(a, b);
46089 let e = _mm512_set1_ps(8.);
46090 assert_eq_m512(r, e);
46091 }
46092
46093 #[simd_test(enable = "avx512f")]
46094 unsafe fn test_mm512_mask_scalef_ps() {
46095 let a = _mm512_set1_ps(1.);
46096 let b = _mm512_set1_ps(3.);
46097 let r = _mm512_mask_scalef_ps(a, 0, a, b);
46098 assert_eq_m512(r, a);
46099 let r = _mm512_mask_scalef_ps(a, 0b11111111_00000000, a, b);
46100 let e = _mm512_set_ps(
46101 8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
46102 );
46103 assert_eq_m512(r, e);
46104 }
46105
46106 #[simd_test(enable = "avx512f")]
46107 unsafe fn test_mm512_maskz_scalef_ps() {
46108 let a = _mm512_set1_ps(1.);
46109 let b = _mm512_set1_ps(3.);
46110 let r = _mm512_maskz_scalef_ps(0, a, b);
46111 assert_eq_m512(r, _mm512_setzero_ps());
46112 let r = _mm512_maskz_scalef_ps(0b11111111_00000000, a, b);
46113 let e = _mm512_set_ps(
46114 8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
46115 );
46116 assert_eq_m512(r, e);
46117 }
46118
46119 #[simd_test(enable = "avx512f,avx512vl")]
46120 unsafe fn test_mm256_scalef_ps() {
46121 let a = _mm256_set1_ps(1.);
46122 let b = _mm256_set1_ps(3.);
46123 let r = _mm256_scalef_ps(a, b);
46124 let e = _mm256_set1_ps(8.);
46125 assert_eq_m256(r, e);
46126 }
46127
46128 #[simd_test(enable = "avx512f,avx512vl")]
46129 unsafe fn test_mm256_mask_scalef_ps() {
46130 let a = _mm256_set1_ps(1.);
46131 let b = _mm256_set1_ps(3.);
46132 let r = _mm256_mask_scalef_ps(a, 0, a, b);
46133 assert_eq_m256(r, a);
46134 let r = _mm256_mask_scalef_ps(a, 0b11111111, a, b);
46135 let e = _mm256_set1_ps(8.);
46136 assert_eq_m256(r, e);
46137 }
46138
46139 #[simd_test(enable = "avx512f,avx512vl")]
46140 unsafe fn test_mm256_maskz_scalef_ps() {
46141 let a = _mm256_set1_ps(1.);
46142 let b = _mm256_set1_ps(3.);
46143 let r = _mm256_maskz_scalef_ps(0, a, b);
46144 assert_eq_m256(r, _mm256_setzero_ps());
46145 let r = _mm256_maskz_scalef_ps(0b11111111, a, b);
46146 let e = _mm256_set1_ps(8.);
46147 assert_eq_m256(r, e);
46148 }
46149
46150 #[simd_test(enable = "avx512f,avx512vl")]
46151 unsafe fn test_mm_scalef_ps() {
46152 let a = _mm_set1_ps(1.);
46153 let b = _mm_set1_ps(3.);
46154 let r = _mm_scalef_ps(a, b);
46155 let e = _mm_set1_ps(8.);
46156 assert_eq_m128(r, e);
46157 }
46158
46159 #[simd_test(enable = "avx512f,avx512vl")]
46160 unsafe fn test_mm_mask_scalef_ps() {
46161 let a = _mm_set1_ps(1.);
46162 let b = _mm_set1_ps(3.);
46163 let r = _mm_mask_scalef_ps(a, 0, a, b);
46164 assert_eq_m128(r, a);
46165 let r = _mm_mask_scalef_ps(a, 0b00001111, a, b);
46166 let e = _mm_set1_ps(8.);
46167 assert_eq_m128(r, e);
46168 }
46169
46170 #[simd_test(enable = "avx512f,avx512vl")]
46171 unsafe fn test_mm_maskz_scalef_ps() {
46172 let a = _mm_set1_ps(1.);
46173 let b = _mm_set1_ps(3.);
46174 let r = _mm_maskz_scalef_ps(0, a, b);
46175 assert_eq_m128(r, _mm_setzero_ps());
46176 let r = _mm_maskz_scalef_ps(0b00001111, a, b);
46177 let e = _mm_set1_ps(8.);
46178 assert_eq_m128(r, e);
46179 }
46180
46181 #[simd_test(enable = "avx512f")]
46182 unsafe fn test_mm512_fixupimm_ps() {
46183 let a = _mm512_set1_ps(f32::NAN);
46184 let b = _mm512_set1_ps(f32::MAX);
46185 let c = _mm512_set1_epi32(i32::MAX);
46186 //let r = _mm512_fixupimm_ps(a, b, c, 5);
46187 let r = _mm512_fixupimm_ps::<5>(a, b, c);
46188 let e = _mm512_set1_ps(0.0);
46189 assert_eq_m512(r, e);
46190 }
46191
46192 #[simd_test(enable = "avx512f")]
46193 unsafe fn test_mm512_mask_fixupimm_ps() {
46194 #[rustfmt::skip]
46195 let a = _mm512_set_ps(
46196 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46197 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46198 1., 1., 1., 1.,
46199 1., 1., 1., 1.,
46200 );
46201 let b = _mm512_set1_ps(f32::MAX);
46202 let c = _mm512_set1_epi32(i32::MAX);
46203 let r = _mm512_mask_fixupimm_ps::<5>(a, 0b11111111_00000000, b, c);
46204 let e = _mm512_set_ps(
46205 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
46206 );
46207 assert_eq_m512(r, e);
46208 }
46209
46210 #[simd_test(enable = "avx512f")]
46211 unsafe fn test_mm512_maskz_fixupimm_ps() {
46212 #[rustfmt::skip]
46213 let a = _mm512_set_ps(
46214 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46215 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
46216 1., 1., 1., 1.,
46217 1., 1., 1., 1.,
46218 );
46219 let b = _mm512_set1_ps(f32::MAX);
46220 let c = _mm512_set1_epi32(i32::MAX);
46221 let r = _mm512_maskz_fixupimm_ps::<5>(0b11111111_00000000, a, b, c);
46222 let e = _mm512_set_ps(
46223 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
46224 );
46225 assert_eq_m512(r, e);
46226 }
46227
46228 #[simd_test(enable = "avx512f,avx512vl")]
46229 unsafe fn test_mm256_fixupimm_ps() {
46230 let a = _mm256_set1_ps(f32::NAN);
46231 let b = _mm256_set1_ps(f32::MAX);
46232 let c = _mm256_set1_epi32(i32::MAX);
46233 let r = _mm256_fixupimm_ps::<5>(a, b, c);
46234 let e = _mm256_set1_ps(0.0);
46235 assert_eq_m256(r, e);
46236 }
46237
46238 #[simd_test(enable = "avx512f,avx512vl")]
46239 unsafe fn test_mm256_mask_fixupimm_ps() {
46240 let a = _mm256_set1_ps(f32::NAN);
46241 let b = _mm256_set1_ps(f32::MAX);
46242 let c = _mm256_set1_epi32(i32::MAX);
46243 let r = _mm256_mask_fixupimm_ps::<5>(a, 0b11111111, b, c);
46244 let e = _mm256_set1_ps(0.0);
46245 assert_eq_m256(r, e);
46246 }
46247
46248 #[simd_test(enable = "avx512f,avx512vl")]
46249 unsafe fn test_mm256_maskz_fixupimm_ps() {
46250 let a = _mm256_set1_ps(f32::NAN);
46251 let b = _mm256_set1_ps(f32::MAX);
46252 let c = _mm256_set1_epi32(i32::MAX);
46253 let r = _mm256_maskz_fixupimm_ps::<5>(0b11111111, a, b, c);
46254 let e = _mm256_set1_ps(0.0);
46255 assert_eq_m256(r, e);
46256 }
46257
46258 #[simd_test(enable = "avx512f,avx512vl")]
46259 unsafe fn test_mm_fixupimm_ps() {
46260 let a = _mm_set1_ps(f32::NAN);
46261 let b = _mm_set1_ps(f32::MAX);
46262 let c = _mm_set1_epi32(i32::MAX);
46263 let r = _mm_fixupimm_ps::<5>(a, b, c);
46264 let e = _mm_set1_ps(0.0);
46265 assert_eq_m128(r, e);
46266 }
46267
46268 #[simd_test(enable = "avx512f,avx512vl")]
46269 unsafe fn test_mm_mask_fixupimm_ps() {
46270 let a = _mm_set1_ps(f32::NAN);
46271 let b = _mm_set1_ps(f32::MAX);
46272 let c = _mm_set1_epi32(i32::MAX);
46273 let r = _mm_mask_fixupimm_ps::<5>(a, 0b00001111, b, c);
46274 let e = _mm_set1_ps(0.0);
46275 assert_eq_m128(r, e);
46276 }
46277
46278 #[simd_test(enable = "avx512f,avx512vl")]
46279 unsafe fn test_mm_maskz_fixupimm_ps() {
46280 let a = _mm_set1_ps(f32::NAN);
46281 let b = _mm_set1_ps(f32::MAX);
46282 let c = _mm_set1_epi32(i32::MAX);
46283 let r = _mm_maskz_fixupimm_ps::<5>(0b00001111, a, b, c);
46284 let e = _mm_set1_ps(0.0);
46285 assert_eq_m128(r, e);
46286 }
46287
46288 #[simd_test(enable = "avx512f")]
46289 unsafe fn test_mm512_ternarylogic_epi32() {
46290 let a = _mm512_set1_epi32(1 << 2);
46291 let b = _mm512_set1_epi32(1 << 1);
46292 let c = _mm512_set1_epi32(1 << 0);
46293 let r = _mm512_ternarylogic_epi32::<8>(a, b, c);
46294 let e = _mm512_set1_epi32(0);
46295 assert_eq_m512i(r, e);
46296 }
46297
46298 #[simd_test(enable = "avx512f")]
46299 unsafe fn test_mm512_mask_ternarylogic_epi32() {
46300 let src = _mm512_set1_epi32(1 << 2);
46301 let a = _mm512_set1_epi32(1 << 1);
46302 let b = _mm512_set1_epi32(1 << 0);
46303 let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46304 assert_eq_m512i(r, src);
46305 let r = _mm512_mask_ternarylogic_epi32::<8>(src, 0b11111111_11111111, a, b);
46306 let e = _mm512_set1_epi32(0);
46307 assert_eq_m512i(r, e);
46308 }
46309
46310 #[simd_test(enable = "avx512f")]
46311 unsafe fn test_mm512_maskz_ternarylogic_epi32() {
46312 let a = _mm512_set1_epi32(1 << 2);
46313 let b = _mm512_set1_epi32(1 << 1);
46314 let c = _mm512_set1_epi32(1 << 0);
46315 let r = _mm512_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46316 assert_eq_m512i(r, _mm512_setzero_si512());
46317 let r = _mm512_maskz_ternarylogic_epi32::<8>(0b11111111_11111111, a, b, c);
46318 let e = _mm512_set1_epi32(0);
46319 assert_eq_m512i(r, e);
46320 }
46321
46322 #[simd_test(enable = "avx512f,avx512vl")]
46323 unsafe fn test_mm256_ternarylogic_epi32() {
46324 let a = _mm256_set1_epi32(1 << 2);
46325 let b = _mm256_set1_epi32(1 << 1);
46326 let c = _mm256_set1_epi32(1 << 0);
46327 let r = _mm256_ternarylogic_epi32::<8>(a, b, c);
46328 let e = _mm256_set1_epi32(0);
46329 assert_eq_m256i(r, e);
46330 }
46331
46332 #[simd_test(enable = "avx512f,avx512vl")]
46333 unsafe fn test_mm256_mask_ternarylogic_epi32() {
46334 let src = _mm256_set1_epi32(1 << 2);
46335 let a = _mm256_set1_epi32(1 << 1);
46336 let b = _mm256_set1_epi32(1 << 0);
46337 let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46338 assert_eq_m256i(r, src);
46339 let r = _mm256_mask_ternarylogic_epi32::<8>(src, 0b11111111, a, b);
46340 let e = _mm256_set1_epi32(0);
46341 assert_eq_m256i(r, e);
46342 }
46343
46344 #[simd_test(enable = "avx512f,avx512vl")]
46345 unsafe fn test_mm256_maskz_ternarylogic_epi32() {
46346 let a = _mm256_set1_epi32(1 << 2);
46347 let b = _mm256_set1_epi32(1 << 1);
46348 let c = _mm256_set1_epi32(1 << 0);
46349 let r = _mm256_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46350 assert_eq_m256i(r, _mm256_setzero_si256());
46351 let r = _mm256_maskz_ternarylogic_epi32::<8>(0b11111111, a, b, c);
46352 let e = _mm256_set1_epi32(0);
46353 assert_eq_m256i(r, e);
46354 }
46355
46356 #[simd_test(enable = "avx512f,avx512vl")]
46357 unsafe fn test_mm_ternarylogic_epi32() {
46358 let a = _mm_set1_epi32(1 << 2);
46359 let b = _mm_set1_epi32(1 << 1);
46360 let c = _mm_set1_epi32(1 << 0);
46361 let r = _mm_ternarylogic_epi32::<8>(a, b, c);
46362 let e = _mm_set1_epi32(0);
46363 assert_eq_m128i(r, e);
46364 }
46365
46366 #[simd_test(enable = "avx512f,avx512vl")]
46367 unsafe fn test_mm_mask_ternarylogic_epi32() {
46368 let src = _mm_set1_epi32(1 << 2);
46369 let a = _mm_set1_epi32(1 << 1);
46370 let b = _mm_set1_epi32(1 << 0);
46371 let r = _mm_mask_ternarylogic_epi32::<8>(src, 0, a, b);
46372 assert_eq_m128i(r, src);
46373 let r = _mm_mask_ternarylogic_epi32::<8>(src, 0b00001111, a, b);
46374 let e = _mm_set1_epi32(0);
46375 assert_eq_m128i(r, e);
46376 }
46377
46378 #[simd_test(enable = "avx512f,avx512vl")]
46379 unsafe fn test_mm_maskz_ternarylogic_epi32() {
46380 let a = _mm_set1_epi32(1 << 2);
46381 let b = _mm_set1_epi32(1 << 1);
46382 let c = _mm_set1_epi32(1 << 0);
46383 let r = _mm_maskz_ternarylogic_epi32::<9>(0, a, b, c);
46384 assert_eq_m128i(r, _mm_setzero_si128());
46385 let r = _mm_maskz_ternarylogic_epi32::<8>(0b00001111, a, b, c);
46386 let e = _mm_set1_epi32(0);
46387 assert_eq_m128i(r, e);
46388 }
46389
46390 #[simd_test(enable = "avx512f")]
46391 unsafe fn test_mm512_getmant_ps() {
46392 let a = _mm512_set1_ps(10.);
46393 let r = _mm512_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46394 let e = _mm512_set1_ps(1.25);
46395 assert_eq_m512(r, e);
46396 }
46397
46398 #[simd_test(enable = "avx512f")]
46399 unsafe fn test_mm512_mask_getmant_ps() {
46400 let a = _mm512_set1_ps(10.);
46401 let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46402 assert_eq_m512(r, a);
46403 let r = _mm512_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(
46404 a,
46405 0b11111111_00000000,
46406 a,
46407 );
46408 let e = _mm512_setr_ps(
46409 10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
46410 );
46411 assert_eq_m512(r, e);
46412 }
46413
46414 #[simd_test(enable = "avx512f")]
46415 unsafe fn test_mm512_maskz_getmant_ps() {
46416 let a = _mm512_set1_ps(10.);
46417 let r = _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46418 assert_eq_m512(r, _mm512_setzero_ps());
46419 let r =
46420 _mm512_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111_00000000, a);
46421 let e = _mm512_setr_ps(
46422 0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
46423 );
46424 assert_eq_m512(r, e);
46425 }
46426
46427 #[simd_test(enable = "avx512f,avx512vl")]
46428 unsafe fn test_mm256_getmant_ps() {
46429 let a = _mm256_set1_ps(10.);
46430 let r = _mm256_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46431 let e = _mm256_set1_ps(1.25);
46432 assert_eq_m256(r, e);
46433 }
46434
46435 #[simd_test(enable = "avx512f,avx512vl")]
46436 unsafe fn test_mm256_mask_getmant_ps() {
46437 let a = _mm256_set1_ps(10.);
46438 let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46439 assert_eq_m256(r, a);
46440 let r = _mm256_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a);
46441 let e = _mm256_set1_ps(1.25);
46442 assert_eq_m256(r, e);
46443 }
46444
46445 #[simd_test(enable = "avx512f,avx512vl")]
46446 unsafe fn test_mm256_maskz_getmant_ps() {
46447 let a = _mm256_set1_ps(10.);
46448 let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46449 assert_eq_m256(r, _mm256_setzero_ps());
46450 let r = _mm256_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a);
46451 let e = _mm256_set1_ps(1.25);
46452 assert_eq_m256(r, e);
46453 }
46454
46455 #[simd_test(enable = "avx512f,avx512vl")]
46456 unsafe fn test_mm_getmant_ps() {
46457 let a = _mm_set1_ps(10.);
46458 let r = _mm_getmant_ps::<_MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN>(a);
46459 let e = _mm_set1_ps(1.25);
46460 assert_eq_m128(r, e);
46461 }
46462
46463 #[simd_test(enable = "avx512f,avx512vl")]
46464 unsafe fn test_mm_mask_getmant_ps() {
46465 let a = _mm_set1_ps(10.);
46466 let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
46467 assert_eq_m128(r, a);
46468 let r = _mm_mask_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00001111, a);
46469 let e = _mm_set1_ps(1.25);
46470 assert_eq_m128(r, e);
46471 }
46472
46473 #[simd_test(enable = "avx512f,avx512vl")]
46474 unsafe fn test_mm_maskz_getmant_ps() {
46475 let a = _mm_set1_ps(10.);
46476 let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
46477 assert_eq_m128(r, _mm_setzero_ps());
46478 let r = _mm_maskz_getmant_ps::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00001111, a);
46479 let e = _mm_set1_ps(1.25);
46480 assert_eq_m128(r, e);
46481 }
46482
46483 #[simd_test(enable = "avx512f")]
46484 unsafe fn test_mm512_add_round_ps() {
46485 let a = _mm512_setr_ps(
46486 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46487 );
46488 let b = _mm512_set1_ps(-1.);
46489 let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46490 #[rustfmt::skip]
46491 let e = _mm512_setr_ps(
46492 -1., 0.5, 1., 2.5,
46493 3., 4.5, 5., 6.5,
46494 7., 8.5, 9., 10.5,
46495 11., 12.5, 13., -0.99999994,
46496 );
46497 assert_eq_m512(r, e);
46498 let r = _mm512_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46499 let e = _mm512_setr_ps(
46500 -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
46501 );
46502 assert_eq_m512(r, e);
46503 }
46504
46505 #[simd_test(enable = "avx512f")]
46506 unsafe fn test_mm512_mask_add_round_ps() {
46507 let a = _mm512_setr_ps(
46508 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46509 );
46510 let b = _mm512_set1_ps(-1.);
46511 let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
46512 assert_eq_m512(r, a);
46513 let r = _mm512_mask_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46514 a,
46515 0b11111111_00000000,
46516 a,
46517 b,
46518 );
46519 #[rustfmt::skip]
46520 let e = _mm512_setr_ps(
46521 0., 1.5, 2., 3.5,
46522 4., 5.5, 6., 7.5,
46523 7., 8.5, 9., 10.5,
46524 11., 12.5, 13., -0.99999994,
46525 );
46526 assert_eq_m512(r, e);
46527 }
46528
46529 #[simd_test(enable = "avx512f")]
46530 unsafe fn test_mm512_maskz_add_round_ps() {
46531 let a = _mm512_setr_ps(
46532 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46533 );
46534 let b = _mm512_set1_ps(-1.);
46535 let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
46536 assert_eq_m512(r, _mm512_setzero_ps());
46537 let r = _mm512_maskz_add_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46538 0b11111111_00000000,
46539 a,
46540 b,
46541 );
46542 #[rustfmt::skip]
46543 let e = _mm512_setr_ps(
46544 0., 0., 0., 0.,
46545 0., 0., 0., 0.,
46546 7., 8.5, 9., 10.5,
46547 11., 12.5, 13., -0.99999994,
46548 );
46549 assert_eq_m512(r, e);
46550 }
46551
46552 #[simd_test(enable = "avx512f")]
46553 unsafe fn test_mm512_sub_round_ps() {
46554 let a = _mm512_setr_ps(
46555 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46556 );
46557 let b = _mm512_set1_ps(1.);
46558 let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46559 #[rustfmt::skip]
46560 let e = _mm512_setr_ps(
46561 -1., 0.5, 1., 2.5,
46562 3., 4.5, 5., 6.5,
46563 7., 8.5, 9., 10.5,
46564 11., 12.5, 13., -0.99999994,
46565 );
46566 assert_eq_m512(r, e);
46567 let r = _mm512_sub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46568 let e = _mm512_setr_ps(
46569 -1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
46570 );
46571 assert_eq_m512(r, e);
46572 }
46573
46574 #[simd_test(enable = "avx512f")]
46575 unsafe fn test_mm512_mask_sub_round_ps() {
46576 let a = _mm512_setr_ps(
46577 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46578 );
46579 let b = _mm512_set1_ps(1.);
46580 let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46581 a, 0, a, b,
46582 );
46583 assert_eq_m512(r, a);
46584 let r = _mm512_mask_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46585 a,
46586 0b11111111_00000000,
46587 a,
46588 b,
46589 );
46590 #[rustfmt::skip]
46591 let e = _mm512_setr_ps(
46592 0., 1.5, 2., 3.5,
46593 4., 5.5, 6., 7.5,
46594 7., 8.5, 9., 10.5,
46595 11., 12.5, 13., -0.99999994,
46596 );
46597 assert_eq_m512(r, e);
46598 }
46599
46600 #[simd_test(enable = "avx512f")]
46601 unsafe fn test_mm512_maskz_sub_round_ps() {
46602 let a = _mm512_setr_ps(
46603 0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
46604 );
46605 let b = _mm512_set1_ps(1.);
46606 let r =
46607 _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46608 assert_eq_m512(r, _mm512_setzero_ps());
46609 let r = _mm512_maskz_sub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46610 0b11111111_00000000,
46611 a,
46612 b,
46613 );
46614 #[rustfmt::skip]
46615 let e = _mm512_setr_ps(
46616 0., 0., 0., 0.,
46617 0., 0., 0., 0.,
46618 7., 8.5, 9., 10.5,
46619 11., 12.5, 13., -0.99999994,
46620 );
46621 assert_eq_m512(r, e);
46622 }
46623
46624 #[simd_test(enable = "avx512f")]
46625 unsafe fn test_mm512_mul_round_ps() {
46626 #[rustfmt::skip]
46627 let a = _mm512_setr_ps(
46628 0., 1.5, 2., 3.5,
46629 4., 5.5, 6., 7.5,
46630 8., 9.5, 10., 11.5,
46631 12., 13.5, 14., 0.00000000000000000000007,
46632 );
46633 let b = _mm512_set1_ps(0.1);
46634 let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46635 #[rustfmt::skip]
46636 let e = _mm512_setr_ps(
46637 0., 0.15, 0.2, 0.35,
46638 0.4, 0.55, 0.6, 0.75,
46639 0.8, 0.95, 1.0, 1.15,
46640 1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46641 );
46642 assert_eq_m512(r, e);
46643 let r = _mm512_mul_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46644 #[rustfmt::skip]
46645 let e = _mm512_setr_ps(
46646 0., 0.14999999, 0.2, 0.35,
46647 0.4, 0.54999995, 0.59999996, 0.75,
46648 0.8, 0.95, 1.0, 1.15,
46649 1.1999999, 1.3499999, 1.4, 0.000000000000000000000007,
46650 );
46651 assert_eq_m512(r, e);
46652 }
46653
46654 #[simd_test(enable = "avx512f")]
46655 unsafe fn test_mm512_mask_mul_round_ps() {
46656 #[rustfmt::skip]
46657 let a = _mm512_setr_ps(
46658 0., 1.5, 2., 3.5,
46659 4., 5.5, 6., 7.5,
46660 8., 9.5, 10., 11.5,
46661 12., 13.5, 14., 0.00000000000000000000007,
46662 );
46663 let b = _mm512_set1_ps(0.1);
46664 let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46665 a, 0, a, b,
46666 );
46667 assert_eq_m512(r, a);
46668 let r = _mm512_mask_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46669 a,
46670 0b11111111_00000000,
46671 a,
46672 b,
46673 );
46674 #[rustfmt::skip]
46675 let e = _mm512_setr_ps(
46676 0., 1.5, 2., 3.5,
46677 4., 5.5, 6., 7.5,
46678 0.8, 0.95, 1.0, 1.15,
46679 1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46680 );
46681 assert_eq_m512(r, e);
46682 }
46683
46684 #[simd_test(enable = "avx512f")]
46685 unsafe fn test_mm512_maskz_mul_round_ps() {
46686 #[rustfmt::skip]
46687 let a = _mm512_setr_ps(
46688 0., 1.5, 2., 3.5,
46689 4., 5.5, 6., 7.5,
46690 8., 9.5, 10., 11.5,
46691 12., 13.5, 14., 0.00000000000000000000007,
46692 );
46693 let b = _mm512_set1_ps(0.1);
46694 let r =
46695 _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46696 assert_eq_m512(r, _mm512_setzero_ps());
46697 let r = _mm512_maskz_mul_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46698 0b11111111_00000000,
46699 a,
46700 b,
46701 );
46702 #[rustfmt::skip]
46703 let e = _mm512_setr_ps(
46704 0., 0., 0., 0.,
46705 0., 0., 0., 0.,
46706 0.8, 0.95, 1.0, 1.15,
46707 1.2, 1.35, 1.4, 0.000000000000000000000007000001,
46708 );
46709 assert_eq_m512(r, e);
46710 }
46711
46712 #[simd_test(enable = "avx512f")]
46713 unsafe fn test_mm512_div_round_ps() {
46714 let a = _mm512_set1_ps(1.);
46715 let b = _mm512_set1_ps(3.);
46716 let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
46717 let e = _mm512_set1_ps(0.33333334);
46718 assert_eq_m512(r, e);
46719 let r = _mm512_div_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
46720 let e = _mm512_set1_ps(0.3333333);
46721 assert_eq_m512(r, e);
46722 }
46723
46724 #[simd_test(enable = "avx512f")]
46725 unsafe fn test_mm512_mask_div_round_ps() {
46726 let a = _mm512_set1_ps(1.);
46727 let b = _mm512_set1_ps(3.);
46728 let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46729 a, 0, a, b,
46730 );
46731 assert_eq_m512(r, a);
46732 let r = _mm512_mask_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46733 a,
46734 0b11111111_00000000,
46735 a,
46736 b,
46737 );
46738 let e = _mm512_setr_ps(
46739 1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46740 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46741 );
46742 assert_eq_m512(r, e);
46743 }
46744
46745 #[simd_test(enable = "avx512f")]
46746 unsafe fn test_mm512_maskz_div_round_ps() {
46747 let a = _mm512_set1_ps(1.);
46748 let b = _mm512_set1_ps(3.);
46749 let r =
46750 _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
46751 assert_eq_m512(r, _mm512_setzero_ps());
46752 let r = _mm512_maskz_div_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46753 0b11111111_00000000,
46754 a,
46755 b,
46756 );
46757 let e = _mm512_setr_ps(
46758 0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46759 0.33333334, 0.33333334, 0.33333334, 0.33333334,
46760 );
46761 assert_eq_m512(r, e);
46762 }
46763
46764 #[simd_test(enable = "avx512f")]
46765 unsafe fn test_mm512_sqrt_round_ps() {
46766 let a = _mm512_set1_ps(3.);
46767 let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
46768 let e = _mm512_set1_ps(1.7320508);
46769 assert_eq_m512(r, e);
46770 let r = _mm512_sqrt_round_ps::<{ _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC }>(a);
46771 let e = _mm512_set1_ps(1.7320509);
46772 assert_eq_m512(r, e);
46773 }
46774
46775 #[simd_test(enable = "avx512f")]
46776 unsafe fn test_mm512_mask_sqrt_round_ps() {
46777 let a = _mm512_set1_ps(3.);
46778 let r =
46779 _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a);
46780 assert_eq_m512(r, a);
46781 let r = _mm512_mask_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46782 a,
46783 0b11111111_00000000,
46784 a,
46785 );
46786 let e = _mm512_setr_ps(
46787 3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
46788 1.7320508, 1.7320508, 1.7320508,
46789 );
46790 assert_eq_m512(r, e);
46791 }
46792
46793 #[simd_test(enable = "avx512f")]
46794 unsafe fn test_mm512_maskz_sqrt_round_ps() {
46795 let a = _mm512_set1_ps(3.);
46796 let r =
46797 _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a);
46798 assert_eq_m512(r, _mm512_setzero_ps());
46799 let r = _mm512_maskz_sqrt_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46800 0b11111111_00000000,
46801 a,
46802 );
46803 let e = _mm512_setr_ps(
46804 0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
46805 1.7320508, 1.7320508, 1.7320508,
46806 );
46807 assert_eq_m512(r, e);
46808 }
46809
46810 #[simd_test(enable = "avx512f")]
46811 unsafe fn test_mm512_fmadd_round_ps() {
46812 let a = _mm512_set1_ps(0.00000007);
46813 let b = _mm512_set1_ps(1.);
46814 let c = _mm512_set1_ps(-1.);
46815 let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46816 let e = _mm512_set1_ps(-0.99999994);
46817 assert_eq_m512(r, e);
46818 let r = _mm512_fmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
46819 let e = _mm512_set1_ps(-0.9999999);
46820 assert_eq_m512(r, e);
46821 }
46822
46823 #[simd_test(enable = "avx512f")]
46824 unsafe fn test_mm512_mask_fmadd_round_ps() {
46825 let a = _mm512_set1_ps(0.00000007);
46826 let b = _mm512_set1_ps(1.);
46827 let c = _mm512_set1_ps(-1.);
46828 let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46829 a, 0, b, c,
46830 );
46831 assert_eq_m512(r, a);
46832 let r = _mm512_mask_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46833 a,
46834 0b00000000_11111111,
46835 b,
46836 c,
46837 );
46838 #[rustfmt::skip]
46839 let e = _mm512_setr_ps(
46840 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46841 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46842 0.00000007, 0.00000007, 0.00000007, 0.00000007,
46843 0.00000007, 0.00000007, 0.00000007, 0.00000007,
46844 );
46845 assert_eq_m512(r, e);
46846 }
46847
46848 #[simd_test(enable = "avx512f")]
46849 unsafe fn test_mm512_maskz_fmadd_round_ps() {
46850 let a = _mm512_set1_ps(0.00000007);
46851 let b = _mm512_set1_ps(1.);
46852 let c = _mm512_set1_ps(-1.);
46853 let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46854 0, a, b, c,
46855 );
46856 assert_eq_m512(r, _mm512_setzero_ps());
46857 #[rustfmt::skip]
46858 let r = _mm512_maskz_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46859 0b00000000_11111111,
46860 a,
46861 b,
46862 c,
46863 );
46864 #[rustfmt::skip]
46865 let e = _mm512_setr_ps(
46866 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46867 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46868 0., 0., 0., 0.,
46869 0., 0., 0., 0.,
46870 );
46871 assert_eq_m512(r, e);
46872 }
46873
46874 #[simd_test(enable = "avx512f")]
46875 unsafe fn test_mm512_mask3_fmadd_round_ps() {
46876 let a = _mm512_set1_ps(0.00000007);
46877 let b = _mm512_set1_ps(1.);
46878 let c = _mm512_set1_ps(-1.);
46879 let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46880 a, b, c, 0,
46881 );
46882 assert_eq_m512(r, c);
46883 let r = _mm512_mask3_fmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46884 a,
46885 b,
46886 c,
46887 0b00000000_11111111,
46888 );
46889 #[rustfmt::skip]
46890 let e = _mm512_setr_ps(
46891 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46892 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46893 -1., -1., -1., -1.,
46894 -1., -1., -1., -1.,
46895 );
46896 assert_eq_m512(r, e);
46897 }
46898
46899 #[simd_test(enable = "avx512f")]
46900 unsafe fn test_mm512_fmsub_round_ps() {
46901 let a = _mm512_set1_ps(0.00000007);
46902 let b = _mm512_set1_ps(1.);
46903 let c = _mm512_set1_ps(1.);
46904 let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46905 let e = _mm512_set1_ps(-0.99999994);
46906 assert_eq_m512(r, e);
46907 let r = _mm512_fmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
46908 let e = _mm512_set1_ps(-0.9999999);
46909 assert_eq_m512(r, e);
46910 }
46911
46912 #[simd_test(enable = "avx512f")]
46913 unsafe fn test_mm512_mask_fmsub_round_ps() {
46914 let a = _mm512_set1_ps(0.00000007);
46915 let b = _mm512_set1_ps(1.);
46916 let c = _mm512_set1_ps(1.);
46917 let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46918 a, 0, b, c,
46919 );
46920 assert_eq_m512(r, a);
46921 let r = _mm512_mask_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46922 a,
46923 0b00000000_11111111,
46924 b,
46925 c,
46926 );
46927 #[rustfmt::skip]
46928 let e = _mm512_setr_ps(
46929 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46930 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46931 0.00000007, 0.00000007, 0.00000007, 0.00000007,
46932 0.00000007, 0.00000007, 0.00000007, 0.00000007,
46933 );
46934 assert_eq_m512(r, e);
46935 }
46936
46937 #[simd_test(enable = "avx512f")]
46938 unsafe fn test_mm512_maskz_fmsub_round_ps() {
46939 let a = _mm512_set1_ps(0.00000007);
46940 let b = _mm512_set1_ps(1.);
46941 let c = _mm512_set1_ps(1.);
46942 let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46943 0, a, b, c,
46944 );
46945 assert_eq_m512(r, _mm512_setzero_ps());
46946 let r = _mm512_maskz_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46947 0b00000000_11111111,
46948 a,
46949 b,
46950 c,
46951 );
46952 #[rustfmt::skip]
46953 let e = _mm512_setr_ps(
46954 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46955 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46956 0., 0., 0., 0.,
46957 0., 0., 0., 0.,
46958 );
46959 assert_eq_m512(r, e);
46960 }
46961
46962 #[simd_test(enable = "avx512f")]
46963 unsafe fn test_mm512_mask3_fmsub_round_ps() {
46964 let a = _mm512_set1_ps(0.00000007);
46965 let b = _mm512_set1_ps(1.);
46966 let c = _mm512_set1_ps(1.);
46967 let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46968 a, b, c, 0,
46969 );
46970 assert_eq_m512(r, c);
46971 let r = _mm512_mask3_fmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
46972 a,
46973 b,
46974 c,
46975 0b00000000_11111111,
46976 );
46977 #[rustfmt::skip]
46978 let e = _mm512_setr_ps(
46979 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46980 -0.99999994, -0.99999994, -0.99999994, -0.99999994,
46981 1., 1., 1., 1.,
46982 1., 1., 1., 1.,
46983 );
46984 assert_eq_m512(r, e);
46985 }
46986
46987 #[simd_test(enable = "avx512f")]
46988 unsafe fn test_mm512_fmaddsub_round_ps() {
46989 let a = _mm512_set1_ps(0.00000007);
46990 let b = _mm512_set1_ps(1.);
46991 let c = _mm512_set1_ps(-1.);
46992 let r =
46993 _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
46994 #[rustfmt::skip]
46995 let e = _mm512_setr_ps(
46996 1.0000001, -0.99999994, 1.0000001, -0.99999994,
46997 1.0000001, -0.99999994, 1.0000001, -0.99999994,
46998 1.0000001, -0.99999994, 1.0000001, -0.99999994,
46999 1.0000001, -0.99999994, 1.0000001, -0.99999994,
47000 );
47001 assert_eq_m512(r, e);
47002 let r = _mm512_fmaddsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47003 let e = _mm512_setr_ps(
47004 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47005 -0.9999999, 1., -0.9999999, 1., -0.9999999,
47006 );
47007 assert_eq_m512(r, e);
47008 }
47009
47010 #[simd_test(enable = "avx512f")]
47011 unsafe fn test_mm512_mask_fmaddsub_round_ps() {
47012 let a = _mm512_set1_ps(0.00000007);
47013 let b = _mm512_set1_ps(1.);
47014 let c = _mm512_set1_ps(-1.);
47015 let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47016 a, 0, b, c,
47017 );
47018 assert_eq_m512(r, a);
47019 let r = _mm512_mask_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47020 a,
47021 0b00000000_11111111,
47022 b,
47023 c,
47024 );
47025 #[rustfmt::skip]
47026 let e = _mm512_setr_ps(
47027 1.0000001, -0.99999994, 1.0000001, -0.99999994,
47028 1.0000001, -0.99999994, 1.0000001, -0.99999994,
47029 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47030 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47031 );
47032 assert_eq_m512(r, e);
47033 }
47034
47035 #[simd_test(enable = "avx512f")]
47036 unsafe fn test_mm512_maskz_fmaddsub_round_ps() {
47037 let a = _mm512_set1_ps(0.00000007);
47038 let b = _mm512_set1_ps(1.);
47039 let c = _mm512_set1_ps(-1.);
47040 let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47041 0, a, b, c,
47042 );
47043 assert_eq_m512(r, _mm512_setzero_ps());
47044 let r = _mm512_maskz_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47045 0b00000000_11111111,
47046 a,
47047 b,
47048 c,
47049 );
47050 #[rustfmt::skip]
47051 let e = _mm512_setr_ps(
47052 1.0000001, -0.99999994, 1.0000001, -0.99999994,
47053 1.0000001, -0.99999994, 1.0000001, -0.99999994,
47054 0., 0., 0., 0.,
47055 0., 0., 0., 0.,
47056 );
47057 assert_eq_m512(r, e);
47058 }
47059
47060 #[simd_test(enable = "avx512f")]
47061 unsafe fn test_mm512_mask3_fmaddsub_round_ps() {
47062 let a = _mm512_set1_ps(0.00000007);
47063 let b = _mm512_set1_ps(1.);
47064 let c = _mm512_set1_ps(-1.);
47065 let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47066 a, b, c, 0,
47067 );
47068 assert_eq_m512(r, c);
47069 let r = _mm512_mask3_fmaddsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47070 a,
47071 b,
47072 c,
47073 0b00000000_11111111,
47074 );
47075 #[rustfmt::skip]
47076 let e = _mm512_setr_ps(
47077 1.0000001, -0.99999994, 1.0000001, -0.99999994,
47078 1.0000001, -0.99999994, 1.0000001, -0.99999994,
47079 -1., -1., -1., -1.,
47080 -1., -1., -1., -1.,
47081 );
47082 assert_eq_m512(r, e);
47083 }
47084
47085 #[simd_test(enable = "avx512f")]
47086 unsafe fn test_mm512_fmsubadd_round_ps() {
47087 let a = _mm512_set1_ps(0.00000007);
47088 let b = _mm512_set1_ps(1.);
47089 let c = _mm512_set1_ps(-1.);
47090 let r =
47091 _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47092 #[rustfmt::skip]
47093 let e = _mm512_setr_ps(
47094 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47095 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47096 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47097 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47098 );
47099 assert_eq_m512(r, e);
47100 let r = _mm512_fmsubadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47101 let e = _mm512_setr_ps(
47102 -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47103 -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
47104 );
47105 assert_eq_m512(r, e);
47106 }
47107
47108 #[simd_test(enable = "avx512f")]
47109 unsafe fn test_mm512_mask_fmsubadd_round_ps() {
47110 let a = _mm512_set1_ps(0.00000007);
47111 let b = _mm512_set1_ps(1.);
47112 let c = _mm512_set1_ps(-1.);
47113 let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47114 a, 0, b, c,
47115 );
47116 assert_eq_m512(r, a);
47117 let r = _mm512_mask_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47118 a,
47119 0b00000000_11111111,
47120 b,
47121 c,
47122 );
47123 #[rustfmt::skip]
47124 let e = _mm512_setr_ps(
47125 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47126 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47127 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47128 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47129 );
47130 assert_eq_m512(r, e);
47131 }
47132
47133 #[simd_test(enable = "avx512f")]
47134 unsafe fn test_mm512_maskz_fmsubadd_round_ps() {
47135 let a = _mm512_set1_ps(0.00000007);
47136 let b = _mm512_set1_ps(1.);
47137 let c = _mm512_set1_ps(-1.);
47138 let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47139 0, a, b, c,
47140 );
47141 assert_eq_m512(r, _mm512_setzero_ps());
47142 let r = _mm512_maskz_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47143 0b00000000_11111111,
47144 a,
47145 b,
47146 c,
47147 );
47148 #[rustfmt::skip]
47149 let e = _mm512_setr_ps(
47150 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47151 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47152 0., 0., 0., 0.,
47153 0., 0., 0., 0.,
47154 );
47155 assert_eq_m512(r, e);
47156 }
47157
47158 #[simd_test(enable = "avx512f")]
47159 unsafe fn test_mm512_mask3_fmsubadd_round_ps() {
47160 let a = _mm512_set1_ps(0.00000007);
47161 let b = _mm512_set1_ps(1.);
47162 let c = _mm512_set1_ps(-1.);
47163 let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47164 a, b, c, 0,
47165 );
47166 assert_eq_m512(r, c);
47167 let r = _mm512_mask3_fmsubadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47168 a,
47169 b,
47170 c,
47171 0b00000000_11111111,
47172 );
47173 #[rustfmt::skip]
47174 let e = _mm512_setr_ps(
47175 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47176 -0.99999994, 1.0000001, -0.99999994, 1.0000001,
47177 -1., -1., -1., -1.,
47178 -1., -1., -1., -1.,
47179 );
47180 assert_eq_m512(r, e);
47181 }
47182
47183 #[simd_test(enable = "avx512f")]
47184 unsafe fn test_mm512_fnmadd_round_ps() {
47185 let a = _mm512_set1_ps(0.00000007);
47186 let b = _mm512_set1_ps(1.);
47187 let c = _mm512_set1_ps(1.);
47188 let r =
47189 _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47190 let e = _mm512_set1_ps(0.99999994);
47191 assert_eq_m512(r, e);
47192 let r = _mm512_fnmadd_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47193 let e = _mm512_set1_ps(0.9999999);
47194 assert_eq_m512(r, e);
47195 }
47196
47197 #[simd_test(enable = "avx512f")]
47198 unsafe fn test_mm512_mask_fnmadd_round_ps() {
47199 let a = _mm512_set1_ps(0.00000007);
47200 let b = _mm512_set1_ps(1.);
47201 let c = _mm512_set1_ps(1.);
47202 let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47203 a, 0, b, c,
47204 );
47205 assert_eq_m512(r, a);
47206 let r = _mm512_mask_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47207 a,
47208 0b00000000_11111111,
47209 b,
47210 c,
47211 );
47212 let e = _mm512_setr_ps(
47213 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47214 0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47215 0.00000007, 0.00000007,
47216 );
47217 assert_eq_m512(r, e);
47218 }
47219
47220 #[simd_test(enable = "avx512f")]
47221 unsafe fn test_mm512_maskz_fnmadd_round_ps() {
47222 let a = _mm512_set1_ps(0.00000007);
47223 let b = _mm512_set1_ps(1.);
47224 let c = _mm512_set1_ps(1.);
47225 let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47226 0, a, b, c,
47227 );
47228 assert_eq_m512(r, _mm512_setzero_ps());
47229 let r = _mm512_maskz_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47230 0b00000000_11111111,
47231 a,
47232 b,
47233 c,
47234 );
47235 let e = _mm512_setr_ps(
47236 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47237 0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
47238 );
47239 assert_eq_m512(r, e);
47240 }
47241
47242 #[simd_test(enable = "avx512f")]
47243 unsafe fn test_mm512_mask3_fnmadd_round_ps() {
47244 let a = _mm512_set1_ps(0.00000007);
47245 let b = _mm512_set1_ps(1.);
47246 let c = _mm512_set1_ps(1.);
47247 let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47248 a, b, c, 0,
47249 );
47250 assert_eq_m512(r, c);
47251 let r = _mm512_mask3_fnmadd_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47252 a,
47253 b,
47254 c,
47255 0b00000000_11111111,
47256 );
47257 let e = _mm512_setr_ps(
47258 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47259 0.99999994, 1., 1., 1., 1., 1., 1., 1., 1.,
47260 );
47261 assert_eq_m512(r, e);
47262 }
47263
47264 #[simd_test(enable = "avx512f")]
47265 unsafe fn test_mm512_fnmsub_round_ps() {
47266 let a = _mm512_set1_ps(0.00000007);
47267 let b = _mm512_set1_ps(1.);
47268 let c = _mm512_set1_ps(-1.);
47269 let r =
47270 _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
47271 let e = _mm512_set1_ps(0.99999994);
47272 assert_eq_m512(r, e);
47273 let r = _mm512_fnmsub_round_ps::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
47274 let e = _mm512_set1_ps(0.9999999);
47275 assert_eq_m512(r, e);
47276 }
47277
47278 #[simd_test(enable = "avx512f")]
47279 unsafe fn test_mm512_mask_fnmsub_round_ps() {
47280 let a = _mm512_set1_ps(0.00000007);
47281 let b = _mm512_set1_ps(1.);
47282 let c = _mm512_set1_ps(-1.);
47283 let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47284 a, 0, b, c,
47285 );
47286 assert_eq_m512(r, a);
47287 let r = _mm512_mask_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47288 a,
47289 0b00000000_11111111,
47290 b,
47291 c,
47292 );
47293 let e = _mm512_setr_ps(
47294 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47295 0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
47296 0.00000007, 0.00000007,
47297 );
47298 assert_eq_m512(r, e);
47299 }
47300
47301 #[simd_test(enable = "avx512f")]
47302 unsafe fn test_mm512_maskz_fnmsub_round_ps() {
47303 let a = _mm512_set1_ps(0.00000007);
47304 let b = _mm512_set1_ps(1.);
47305 let c = _mm512_set1_ps(-1.);
47306 let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47307 0, a, b, c,
47308 );
47309 assert_eq_m512(r, _mm512_setzero_ps());
47310 let r = _mm512_maskz_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47311 0b00000000_11111111,
47312 a,
47313 b,
47314 c,
47315 );
47316 let e = _mm512_setr_ps(
47317 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47318 0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
47319 );
47320 assert_eq_m512(r, e);
47321 }
47322
47323 #[simd_test(enable = "avx512f")]
47324 unsafe fn test_mm512_mask3_fnmsub_round_ps() {
47325 let a = _mm512_set1_ps(0.00000007);
47326 let b = _mm512_set1_ps(1.);
47327 let c = _mm512_set1_ps(-1.);
47328 let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47329 a, b, c, 0,
47330 );
47331 assert_eq_m512(r, c);
47332 let r = _mm512_mask3_fnmsub_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47333 a,
47334 b,
47335 c,
47336 0b00000000_11111111,
47337 );
47338 let e = _mm512_setr_ps(
47339 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
47340 0.99999994, -1., -1., -1., -1., -1., -1., -1., -1.,
47341 );
47342 assert_eq_m512(r, e);
47343 }
47344
47345 #[simd_test(enable = "avx512f")]
47346 unsafe fn test_mm512_max_round_ps() {
47347 let a = _mm512_setr_ps(
47348 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47349 );
47350 let b = _mm512_setr_ps(
47351 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47352 );
47353 let r = _mm512_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
47354 let e = _mm512_setr_ps(
47355 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
47356 );
47357 assert_eq_m512(r, e);
47358 }
47359
47360 #[simd_test(enable = "avx512f")]
47361 unsafe fn test_mm512_mask_max_round_ps() {
47362 let a = _mm512_setr_ps(
47363 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47364 );
47365 let b = _mm512_setr_ps(
47366 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47367 );
47368 let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
47369 assert_eq_m512(r, a);
47370 let r = _mm512_mask_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
47371 let e = _mm512_setr_ps(
47372 15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
47373 );
47374 assert_eq_m512(r, e);
47375 }
47376
47377 #[simd_test(enable = "avx512f")]
47378 unsafe fn test_mm512_maskz_max_round_ps() {
47379 let a = _mm512_setr_ps(
47380 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47381 );
47382 let b = _mm512_setr_ps(
47383 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47384 );
47385 let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
47386 assert_eq_m512(r, _mm512_setzero_ps());
47387 let r = _mm512_maskz_max_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
47388 let e = _mm512_setr_ps(
47389 15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
47390 );
47391 assert_eq_m512(r, e);
47392 }
47393
47394 #[simd_test(enable = "avx512f")]
47395 unsafe fn test_mm512_min_round_ps() {
47396 let a = _mm512_setr_ps(
47397 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47398 );
47399 let b = _mm512_setr_ps(
47400 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47401 );
47402 let r = _mm512_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, b);
47403 let e = _mm512_setr_ps(
47404 0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
47405 );
47406 assert_eq_m512(r, e);
47407 }
47408
47409 #[simd_test(enable = "avx512f")]
47410 unsafe fn test_mm512_mask_min_round_ps() {
47411 let a = _mm512_setr_ps(
47412 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47413 );
47414 let b = _mm512_setr_ps(
47415 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47416 );
47417 let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
47418 assert_eq_m512(r, a);
47419 let r = _mm512_mask_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b00000000_11111111, a, b);
47420 let e = _mm512_setr_ps(
47421 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47422 );
47423 assert_eq_m512(r, e);
47424 }
47425
47426 #[simd_test(enable = "avx512f")]
47427 unsafe fn test_mm512_maskz_min_round_ps() {
47428 let a = _mm512_setr_ps(
47429 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
47430 );
47431 let b = _mm512_setr_ps(
47432 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
47433 );
47434 let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
47435 assert_eq_m512(r, _mm512_setzero_ps());
47436 let r = _mm512_maskz_min_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b00000000_11111111, a, b);
47437 let e = _mm512_setr_ps(
47438 0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
47439 );
47440 assert_eq_m512(r, e);
47441 }
47442
47443 #[simd_test(enable = "avx512f")]
47444 unsafe fn test_mm512_getexp_round_ps() {
47445 let a = _mm512_set1_ps(3.);
47446 let r = _mm512_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a);
47447 let e = _mm512_set1_ps(1.);
47448 assert_eq_m512(r, e);
47449 }
47450
47451 #[simd_test(enable = "avx512f")]
47452 unsafe fn test_mm512_mask_getexp_round_ps() {
47453 let a = _mm512_set1_ps(3.);
47454 let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0, a);
47455 assert_eq_m512(r, a);
47456 let r = _mm512_mask_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111_00000000, a);
47457 let e = _mm512_setr_ps(
47458 3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
47459 );
47460 assert_eq_m512(r, e);
47461 }
47462
47463 #[simd_test(enable = "avx512f")]
47464 unsafe fn test_mm512_maskz_getexp_round_ps() {
47465 let a = _mm512_set1_ps(3.);
47466 let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0, a);
47467 assert_eq_m512(r, _mm512_setzero_ps());
47468 let r = _mm512_maskz_getexp_round_ps::<_MM_FROUND_CUR_DIRECTION>(0b11111111_00000000, a);
47469 let e = _mm512_setr_ps(
47470 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47471 );
47472 assert_eq_m512(r, e);
47473 }
47474
47475 #[simd_test(enable = "avx512f")]
47476 unsafe fn test_mm512_roundscale_round_ps() {
47477 let a = _mm512_set1_ps(1.1);
47478 let r = _mm512_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a);
47479 let e = _mm512_set1_ps(1.0);
47480 assert_eq_m512(r, e);
47481 }
47482
47483 #[simd_test(enable = "avx512f")]
47484 unsafe fn test_mm512_mask_roundscale_round_ps() {
47485 let a = _mm512_set1_ps(1.1);
47486 let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a);
47487 let e = _mm512_set1_ps(1.1);
47488 assert_eq_m512(r, e);
47489 let r = _mm512_mask_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(
47490 a,
47491 0b11111111_11111111,
47492 a,
47493 );
47494 let e = _mm512_set1_ps(1.0);
47495 assert_eq_m512(r, e);
47496 }
47497
47498 #[simd_test(enable = "avx512f")]
47499 unsafe fn test_mm512_maskz_roundscale_round_ps() {
47500 let a = _mm512_set1_ps(1.1);
47501 let r = _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0, a);
47502 assert_eq_m512(r, _mm512_setzero_ps());
47503 let r =
47504 _mm512_maskz_roundscale_round_ps::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111_11111111, a);
47505 let e = _mm512_set1_ps(1.0);
47506 assert_eq_m512(r, e);
47507 }
47508
47509 #[simd_test(enable = "avx512f")]
47510 unsafe fn test_mm512_scalef_round_ps() {
47511 let a = _mm512_set1_ps(1.);
47512 let b = _mm512_set1_ps(3.);
47513 let r = _mm512_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
47514 let e = _mm512_set1_ps(8.);
47515 assert_eq_m512(r, e);
47516 }
47517
47518 #[simd_test(enable = "avx512f")]
47519 unsafe fn test_mm512_mask_scalef_round_ps() {
47520 let a = _mm512_set1_ps(1.);
47521 let b = _mm512_set1_ps(3.);
47522 let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47523 a, 0, a, b,
47524 );
47525 assert_eq_m512(r, a);
47526 let r = _mm512_mask_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47527 a,
47528 0b11111111_00000000,
47529 a,
47530 b,
47531 );
47532 let e = _mm512_set_ps(
47533 8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
47534 );
47535 assert_eq_m512(r, e);
47536 }
47537
47538 #[simd_test(enable = "avx512f")]
47539 unsafe fn test_mm512_maskz_scalef_round_ps() {
47540 let a = _mm512_set1_ps(1.);
47541 let b = _mm512_set1_ps(3.);
47542 let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47543 0, a, b,
47544 );
47545 assert_eq_m512(r, _mm512_setzero_ps());
47546 let r = _mm512_maskz_scalef_round_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
47547 0b11111111_00000000,
47548 a,
47549 b,
47550 );
47551 let e = _mm512_set_ps(
47552 8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
47553 );
47554 assert_eq_m512(r, e);
47555 }
47556
47557 #[simd_test(enable = "avx512f")]
47558 unsafe fn test_mm512_fixupimm_round_ps() {
47559 let a = _mm512_set1_ps(f32::NAN);
47560 let b = _mm512_set1_ps(f32::MAX);
47561 let c = _mm512_set1_epi32(i32::MAX);
47562 let r = _mm512_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
47563 let e = _mm512_set1_ps(0.0);
47564 assert_eq_m512(r, e);
47565 }
47566
47567 #[simd_test(enable = "avx512f")]
47568 unsafe fn test_mm512_mask_fixupimm_round_ps() {
47569 #[rustfmt::skip]
47570 let a = _mm512_set_ps(
47571 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47572 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47573 1., 1., 1., 1.,
47574 1., 1., 1., 1.,
47575 );
47576 let b = _mm512_set1_ps(f32::MAX);
47577 let c = _mm512_set1_epi32(i32::MAX);
47578 let r = _mm512_mask_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
47579 a,
47580 0b11111111_00000000,
47581 b,
47582 c,
47583 );
47584 let e = _mm512_set_ps(
47585 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
47586 );
47587 assert_eq_m512(r, e);
47588 }
47589
47590 #[simd_test(enable = "avx512f")]
47591 unsafe fn test_mm512_maskz_fixupimm_round_ps() {
47592 #[rustfmt::skip]
47593 let a = _mm512_set_ps(
47594 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47595 f32::NAN, f32::NAN, f32::NAN, f32::NAN,
47596 1., 1., 1., 1.,
47597 1., 1., 1., 1.,
47598 );
47599 let b = _mm512_set1_ps(f32::MAX);
47600 let c = _mm512_set1_epi32(i32::MAX);
47601 let r = _mm512_maskz_fixupimm_round_ps::<5, _MM_FROUND_CUR_DIRECTION>(
47602 0b11111111_00000000,
47603 a,
47604 b,
47605 c,
47606 );
47607 let e = _mm512_set_ps(
47608 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
47609 );
47610 assert_eq_m512(r, e);
47611 }
47612
47613 #[simd_test(enable = "avx512f")]
47614 unsafe fn test_mm512_getmant_round_ps() {
47615 let a = _mm512_set1_ps(10.);
47616 let r = _mm512_getmant_round_ps::<
47617 _MM_MANT_NORM_1_2,
47618 _MM_MANT_SIGN_SRC,
47619 _MM_FROUND_CUR_DIRECTION,
47620 >(a);
47621 let e = _mm512_set1_ps(1.25);
47622 assert_eq_m512(r, e);
47623 }
47624
47625 #[simd_test(enable = "avx512f")]
47626 unsafe fn test_mm512_mask_getmant_round_ps() {
47627 let a = _mm512_set1_ps(10.);
47628 let r = _mm512_mask_getmant_round_ps::<
47629 _MM_MANT_NORM_1_2,
47630 _MM_MANT_SIGN_SRC,
47631 _MM_FROUND_CUR_DIRECTION,
47632 >(a, 0, a);
47633 assert_eq_m512(r, a);
47634 let r = _mm512_mask_getmant_round_ps::<
47635 _MM_MANT_NORM_1_2,
47636 _MM_MANT_SIGN_SRC,
47637 _MM_FROUND_CUR_DIRECTION,
47638 >(a, 0b11111111_00000000, a);
47639 let e = _mm512_setr_ps(
47640 10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
47641 );
47642 assert_eq_m512(r, e);
47643 }
47644
47645 #[simd_test(enable = "avx512f")]
47646 unsafe fn test_mm512_maskz_getmant_round_ps() {
47647 let a = _mm512_set1_ps(10.);
47648 let r = _mm512_maskz_getmant_round_ps::<
47649 _MM_MANT_NORM_1_2,
47650 _MM_MANT_SIGN_SRC,
47651 _MM_FROUND_CUR_DIRECTION,
47652 >(0, a);
47653 assert_eq_m512(r, _mm512_setzero_ps());
47654 let r = _mm512_maskz_getmant_round_ps::<
47655 _MM_MANT_NORM_1_2,
47656 _MM_MANT_SIGN_SRC,
47657 _MM_FROUND_CUR_DIRECTION,
47658 >(0b11111111_00000000, a);
47659 let e = _mm512_setr_ps(
47660 0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
47661 );
47662 assert_eq_m512(r, e);
47663 }
47664
47665 #[simd_test(enable = "avx512f")]
47666 unsafe fn test_mm512_cvtps_epi32() {
47667 let a = _mm512_setr_ps(
47668 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47669 );
47670 let r = _mm512_cvtps_epi32(a);
47671 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
47672 assert_eq_m512i(r, e);
47673 }
47674
47675 #[simd_test(enable = "avx512f")]
47676 unsafe fn test_mm512_mask_cvtps_epi32() {
47677 let a = _mm512_setr_ps(
47678 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47679 );
47680 let src = _mm512_set1_epi32(0);
47681 let r = _mm512_mask_cvtps_epi32(src, 0, a);
47682 assert_eq_m512i(r, src);
47683 let r = _mm512_mask_cvtps_epi32(src, 0b00000000_11111111, a);
47684 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
47685 assert_eq_m512i(r, e);
47686 }
47687
47688 #[simd_test(enable = "avx512f")]
47689 unsafe fn test_mm512_maskz_cvtps_epi32() {
47690 let a = _mm512_setr_ps(
47691 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47692 );
47693 let r = _mm512_maskz_cvtps_epi32(0, a);
47694 assert_eq_m512i(r, _mm512_setzero_si512());
47695 let r = _mm512_maskz_cvtps_epi32(0b00000000_11111111, a);
47696 let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
47697 assert_eq_m512i(r, e);
47698 }
47699
47700 #[simd_test(enable = "avx512f,avx512vl")]
47701 unsafe fn test_mm256_mask_cvtps_epi32() {
47702 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47703 let src = _mm256_set1_epi32(0);
47704 let r = _mm256_mask_cvtps_epi32(src, 0, a);
47705 assert_eq_m256i(r, src);
47706 let r = _mm256_mask_cvtps_epi32(src, 0b11111111, a);
47707 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47708 assert_eq_m256i(r, e);
47709 }
47710
47711 #[simd_test(enable = "avx512f,avx512vl")]
47712 unsafe fn test_mm256_maskz_cvtps_epi32() {
47713 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47714 let r = _mm256_maskz_cvtps_epi32(0, a);
47715 assert_eq_m256i(r, _mm256_setzero_si256());
47716 let r = _mm256_maskz_cvtps_epi32(0b11111111, a);
47717 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47718 assert_eq_m256i(r, e);
47719 }
47720
47721 #[simd_test(enable = "avx512f,avx512vl")]
47722 unsafe fn test_mm_mask_cvtps_epi32() {
47723 let a = _mm_set_ps(12., 13.5, 14., 15.5);
47724 let src = _mm_set1_epi32(0);
47725 let r = _mm_mask_cvtps_epi32(src, 0, a);
47726 assert_eq_m128i(r, src);
47727 let r = _mm_mask_cvtps_epi32(src, 0b00001111, a);
47728 let e = _mm_set_epi32(12, 14, 14, 16);
47729 assert_eq_m128i(r, e);
47730 }
47731
47732 #[simd_test(enable = "avx512f,avx512vl")]
47733 unsafe fn test_mm_maskz_cvtps_epi32() {
47734 let a = _mm_set_ps(12., 13.5, 14., 15.5);
47735 let r = _mm_maskz_cvtps_epi32(0, a);
47736 assert_eq_m128i(r, _mm_setzero_si128());
47737 let r = _mm_maskz_cvtps_epi32(0b00001111, a);
47738 let e = _mm_set_epi32(12, 14, 14, 16);
47739 assert_eq_m128i(r, e);
47740 }
47741
47742 #[simd_test(enable = "avx512f")]
47743 unsafe fn test_mm512_cvtps_epu32() {
47744 let a = _mm512_setr_ps(
47745 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47746 );
47747 let r = _mm512_cvtps_epu32(a);
47748 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
47749 assert_eq_m512i(r, e);
47750 }
47751
47752 #[simd_test(enable = "avx512f")]
47753 unsafe fn test_mm512_mask_cvtps_epu32() {
47754 let a = _mm512_setr_ps(
47755 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47756 );
47757 let src = _mm512_set1_epi32(0);
47758 let r = _mm512_mask_cvtps_epu32(src, 0, a);
47759 assert_eq_m512i(r, src);
47760 let r = _mm512_mask_cvtps_epu32(src, 0b00000000_11111111, a);
47761 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47762 assert_eq_m512i(r, e);
47763 }
47764
47765 #[simd_test(enable = "avx512f")]
47766 unsafe fn test_mm512_maskz_cvtps_epu32() {
47767 let a = _mm512_setr_ps(
47768 0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
47769 );
47770 let r = _mm512_maskz_cvtps_epu32(0, a);
47771 assert_eq_m512i(r, _mm512_setzero_si512());
47772 let r = _mm512_maskz_cvtps_epu32(0b00000000_11111111, a);
47773 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
47774 assert_eq_m512i(r, e);
47775 }
47776
47777 #[simd_test(enable = "avx512f,avx512vl")]
47778 unsafe fn test_mm256_cvtps_epu32() {
47779 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47780 let r = _mm256_cvtps_epu32(a);
47781 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47782 assert_eq_m256i(r, e);
47783 }
47784
47785 #[simd_test(enable = "avx512f,avx512vl")]
47786 unsafe fn test_mm256_mask_cvtps_epu32() {
47787 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47788 let src = _mm256_set1_epi32(0);
47789 let r = _mm256_mask_cvtps_epu32(src, 0, a);
47790 assert_eq_m256i(r, src);
47791 let r = _mm256_mask_cvtps_epu32(src, 0b11111111, a);
47792 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47793 assert_eq_m256i(r, e);
47794 }
47795
47796 #[simd_test(enable = "avx512f,avx512vl")]
47797 unsafe fn test_mm256_maskz_cvtps_epu32() {
47798 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
47799 let r = _mm256_maskz_cvtps_epu32(0, a);
47800 assert_eq_m256i(r, _mm256_setzero_si256());
47801 let r = _mm256_maskz_cvtps_epu32(0b11111111, a);
47802 let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
47803 assert_eq_m256i(r, e);
47804 }
47805
47806 #[simd_test(enable = "avx512f,avx512vl")]
47807 unsafe fn test_mm_cvtps_epu32() {
47808 let a = _mm_set_ps(12., 13.5, 14., 15.5);
47809 let r = _mm_cvtps_epu32(a);
47810 let e = _mm_set_epi32(12, 14, 14, 16);
47811 assert_eq_m128i(r, e);
47812 }
47813
47814 #[simd_test(enable = "avx512f,avx512vl")]
47815 unsafe fn test_mm_mask_cvtps_epu32() {
47816 let a = _mm_set_ps(12., 13.5, 14., 15.5);
47817 let src = _mm_set1_epi32(0);
47818 let r = _mm_mask_cvtps_epu32(src, 0, a);
47819 assert_eq_m128i(r, src);
47820 let r = _mm_mask_cvtps_epu32(src, 0b00001111, a);
47821 let e = _mm_set_epi32(12, 14, 14, 16);
47822 assert_eq_m128i(r, e);
47823 }
47824
47825 #[simd_test(enable = "avx512f,avx512vl")]
47826 unsafe fn test_mm_maskz_cvtps_epu32() {
47827 let a = _mm_set_ps(12., 13.5, 14., 15.5);
47828 let r = _mm_maskz_cvtps_epu32(0, a);
47829 assert_eq_m128i(r, _mm_setzero_si128());
47830 let r = _mm_maskz_cvtps_epu32(0b00001111, a);
47831 let e = _mm_set_epi32(12, 14, 14, 16);
47832 assert_eq_m128i(r, e);
47833 }
47834
47835 #[simd_test(enable = "avx512f")]
47836 unsafe fn test_mm512_cvtepi8_epi32() {
47837 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47838 let r = _mm512_cvtepi8_epi32(a);
47839 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47840 assert_eq_m512i(r, e);
47841 }
47842
47843 #[simd_test(enable = "avx512f")]
47844 unsafe fn test_mm512_mask_cvtepi8_epi32() {
47845 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47846 let src = _mm512_set1_epi32(-1);
47847 let r = _mm512_mask_cvtepi8_epi32(src, 0, a);
47848 assert_eq_m512i(r, src);
47849 let r = _mm512_mask_cvtepi8_epi32(src, 0b00000000_11111111, a);
47850 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47851 assert_eq_m512i(r, e);
47852 }
47853
47854 #[simd_test(enable = "avx512f")]
47855 unsafe fn test_mm512_maskz_cvtepi8_epi32() {
47856 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47857 let r = _mm512_maskz_cvtepi8_epi32(0, a);
47858 assert_eq_m512i(r, _mm512_setzero_si512());
47859 let r = _mm512_maskz_cvtepi8_epi32(0b00000000_11111111, a);
47860 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
47861 assert_eq_m512i(r, e);
47862 }
47863
47864 #[simd_test(enable = "avx512f,avx512vl")]
47865 unsafe fn test_mm256_mask_cvtepi8_epi32() {
47866 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47867 let src = _mm256_set1_epi32(-1);
47868 let r = _mm256_mask_cvtepi8_epi32(src, 0, a);
47869 assert_eq_m256i(r, src);
47870 let r = _mm256_mask_cvtepi8_epi32(src, 0b11111111, a);
47871 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47872 assert_eq_m256i(r, e);
47873 }
47874
47875 #[simd_test(enable = "avx512f,avx512vl")]
47876 unsafe fn test_mm256_maskz_cvtepi8_epi32() {
47877 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47878 let r = _mm256_maskz_cvtepi8_epi32(0, a);
47879 assert_eq_m256i(r, _mm256_setzero_si256());
47880 let r = _mm256_maskz_cvtepi8_epi32(0b11111111, a);
47881 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47882 assert_eq_m256i(r, e);
47883 }
47884
47885 #[simd_test(enable = "avx512f,avx512vl")]
47886 unsafe fn test_mm_mask_cvtepi8_epi32() {
47887 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47888 let src = _mm_set1_epi32(-1);
47889 let r = _mm_mask_cvtepi8_epi32(src, 0, a);
47890 assert_eq_m128i(r, src);
47891 let r = _mm_mask_cvtepi8_epi32(src, 0b00001111, a);
47892 let e = _mm_set_epi32(12, 13, 14, 15);
47893 assert_eq_m128i(r, e);
47894 }
47895
47896 #[simd_test(enable = "avx512f,avx512vl")]
47897 unsafe fn test_mm_maskz_cvtepi8_epi32() {
47898 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47899 let r = _mm_maskz_cvtepi8_epi32(0, a);
47900 assert_eq_m128i(r, _mm_setzero_si128());
47901 let r = _mm_maskz_cvtepi8_epi32(0b00001111, a);
47902 let e = _mm_set_epi32(12, 13, 14, 15);
47903 assert_eq_m128i(r, e);
47904 }
47905
47906 #[simd_test(enable = "avx512f")]
47907 unsafe fn test_mm512_cvtepu8_epi32() {
47908 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47909 let r = _mm512_cvtepu8_epi32(a);
47910 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47911 assert_eq_m512i(r, e);
47912 }
47913
47914 #[simd_test(enable = "avx512f")]
47915 unsafe fn test_mm512_mask_cvtepu8_epi32() {
47916 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47917 let src = _mm512_set1_epi32(-1);
47918 let r = _mm512_mask_cvtepu8_epi32(src, 0, a);
47919 assert_eq_m512i(r, src);
47920 let r = _mm512_mask_cvtepu8_epi32(src, 0b00000000_11111111, a);
47921 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47922 assert_eq_m512i(r, e);
47923 }
47924
47925 #[simd_test(enable = "avx512f")]
47926 unsafe fn test_mm512_maskz_cvtepu8_epi32() {
47927 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47928 let r = _mm512_maskz_cvtepu8_epi32(0, a);
47929 assert_eq_m512i(r, _mm512_setzero_si512());
47930 let r = _mm512_maskz_cvtepu8_epi32(0b00000000_11111111, a);
47931 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
47932 assert_eq_m512i(r, e);
47933 }
47934
47935 #[simd_test(enable = "avx512f,avx512vl")]
47936 unsafe fn test_mm256_mask_cvtepu8_epi32() {
47937 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47938 let src = _mm256_set1_epi32(-1);
47939 let r = _mm256_mask_cvtepu8_epi32(src, 0, a);
47940 assert_eq_m256i(r, src);
47941 let r = _mm256_mask_cvtepu8_epi32(src, 0b11111111, a);
47942 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47943 assert_eq_m256i(r, e);
47944 }
47945
47946 #[simd_test(enable = "avx512f,avx512vl")]
47947 unsafe fn test_mm256_maskz_cvtepu8_epi32() {
47948 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47949 let r = _mm256_maskz_cvtepu8_epi32(0, a);
47950 assert_eq_m256i(r, _mm256_setzero_si256());
47951 let r = _mm256_maskz_cvtepu8_epi32(0b11111111, a);
47952 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
47953 assert_eq_m256i(r, e);
47954 }
47955
47956 #[simd_test(enable = "avx512f,avx512vl")]
47957 unsafe fn test_mm_mask_cvtepu8_epi32() {
47958 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47959 let src = _mm_set1_epi32(-1);
47960 let r = _mm_mask_cvtepu8_epi32(src, 0, a);
47961 assert_eq_m128i(r, src);
47962 let r = _mm_mask_cvtepu8_epi32(src, 0b00001111, a);
47963 let e = _mm_set_epi32(12, 13, 14, 15);
47964 assert_eq_m128i(r, e);
47965 }
47966
47967 #[simd_test(enable = "avx512f,avx512vl")]
47968 unsafe fn test_mm_maskz_cvtepu8_epi32() {
47969 let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47970 let r = _mm_maskz_cvtepu8_epi32(0, a);
47971 assert_eq_m128i(r, _mm_setzero_si128());
47972 let r = _mm_maskz_cvtepu8_epi32(0b00001111, a);
47973 let e = _mm_set_epi32(12, 13, 14, 15);
47974 assert_eq_m128i(r, e);
47975 }
47976
47977 #[simd_test(enable = "avx512f")]
47978 unsafe fn test_mm512_cvtepi16_epi32() {
47979 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47980 let r = _mm512_cvtepi16_epi32(a);
47981 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47982 assert_eq_m512i(r, e);
47983 }
47984
47985 #[simd_test(enable = "avx512f")]
47986 unsafe fn test_mm512_mask_cvtepi16_epi32() {
47987 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47988 let src = _mm512_set1_epi32(-1);
47989 let r = _mm512_mask_cvtepi16_epi32(src, 0, a);
47990 assert_eq_m512i(r, src);
47991 let r = _mm512_mask_cvtepi16_epi32(src, 0b00000000_11111111, a);
47992 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
47993 assert_eq_m512i(r, e);
47994 }
47995
47996 #[simd_test(enable = "avx512f")]
47997 unsafe fn test_mm512_maskz_cvtepi16_epi32() {
47998 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
47999 let r = _mm512_maskz_cvtepi16_epi32(0, a);
48000 assert_eq_m512i(r, _mm512_setzero_si512());
48001 let r = _mm512_maskz_cvtepi16_epi32(0b00000000_11111111, a);
48002 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48003 assert_eq_m512i(r, e);
48004 }
48005
48006 #[simd_test(enable = "avx512f,avx512vl")]
48007 unsafe fn test_mm256_mask_cvtepi16_epi32() {
48008 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48009 let src = _mm256_set1_epi32(-1);
48010 let r = _mm256_mask_cvtepi16_epi32(src, 0, a);
48011 assert_eq_m256i(r, src);
48012 let r = _mm256_mask_cvtepi16_epi32(src, 0b11111111, a);
48013 let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48014 assert_eq_m256i(r, e);
48015 }
48016
48017 #[simd_test(enable = "avx512f,avx512vl")]
48018 unsafe fn test_mm256_maskz_cvtepi16_epi32() {
48019 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48020 let r = _mm256_maskz_cvtepi16_epi32(0, a);
48021 assert_eq_m256i(r, _mm256_setzero_si256());
48022 let r = _mm256_maskz_cvtepi16_epi32(0b11111111, a);
48023 let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48024 assert_eq_m256i(r, e);
48025 }
48026
48027 #[simd_test(enable = "avx512f,avx512vl")]
48028 unsafe fn test_mm_mask_cvtepi16_epi32() {
48029 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48030 let src = _mm_set1_epi32(-1);
48031 let r = _mm_mask_cvtepi16_epi32(src, 0, a);
48032 assert_eq_m128i(r, src);
48033 let r = _mm_mask_cvtepi16_epi32(src, 0b00001111, a);
48034 let e = _mm_set_epi32(4, 5, 6, 7);
48035 assert_eq_m128i(r, e);
48036 }
48037
48038 #[simd_test(enable = "avx512f,avx512vl")]
48039 unsafe fn test_mm_maskz_cvtepi16_epi32() {
48040 let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48041 let r = _mm_maskz_cvtepi16_epi32(0, a);
48042 assert_eq_m128i(r, _mm_setzero_si128());
48043 let r = _mm_maskz_cvtepi16_epi32(0b00001111, a);
48044 let e = _mm_set_epi32(4, 5, 6, 7);
48045 assert_eq_m128i(r, e);
48046 }
48047
48048 #[simd_test(enable = "avx512f")]
48049 unsafe fn test_mm512_cvtepu16_epi32() {
48050 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48051 let r = _mm512_cvtepu16_epi32(a);
48052 let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48053 assert_eq_m512i(r, e);
48054 }
48055
48056 #[simd_test(enable = "avx512f")]
48057 unsafe fn test_mm512_mask_cvtepu16_epi32() {
48058 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48059 let src = _mm512_set1_epi32(-1);
48060 let r = _mm512_mask_cvtepu16_epi32(src, 0, a);
48061 assert_eq_m512i(r, src);
48062 let r = _mm512_mask_cvtepu16_epi32(src, 0b00000000_11111111, a);
48063 let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48064 assert_eq_m512i(r, e);
48065 }
48066
48067 #[simd_test(enable = "avx512f")]
48068 unsafe fn test_mm512_maskz_cvtepu16_epi32() {
48069 let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48070 let r = _mm512_maskz_cvtepu16_epi32(0, a);
48071 assert_eq_m512i(r, _mm512_setzero_si512());
48072 let r = _mm512_maskz_cvtepu16_epi32(0b00000000_11111111, a);
48073 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48074 assert_eq_m512i(r, e);
48075 }
48076
48077 #[simd_test(enable = "avx512f,avx512vl")]
48078 unsafe fn test_mm256_mask_cvtepu16_epi32() {
48079 let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48080 let src = _mm256_set1_epi32(-1);
48081 let r = _mm256_mask_cvtepu16_epi32(src, 0, a);
48082 assert_eq_m256i(r, src);
48083 let r = _mm256_mask_cvtepu16_epi32(src, 0b11111111, a);
48084 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48085 assert_eq_m256i(r, e);
48086 }
48087
48088 #[simd_test(enable = "avx512f,avx512vl")]
48089 unsafe fn test_mm256_maskz_cvtepu16_epi32() {
48090 let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48091 let r = _mm256_maskz_cvtepu16_epi32(0, a);
48092 assert_eq_m256i(r, _mm256_setzero_si256());
48093 let r = _mm256_maskz_cvtepu16_epi32(0b11111111, a);
48094 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
48095 assert_eq_m256i(r, e);
48096 }
48097
48098 #[simd_test(enable = "avx512f,avx512vl")]
48099 unsafe fn test_mm_mask_cvtepu16_epi32() {
48100 let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48101 let src = _mm_set1_epi32(-1);
48102 let r = _mm_mask_cvtepu16_epi32(src, 0, a);
48103 assert_eq_m128i(r, src);
48104 let r = _mm_mask_cvtepu16_epi32(src, 0b00001111, a);
48105 let e = _mm_set_epi32(12, 13, 14, 15);
48106 assert_eq_m128i(r, e);
48107 }
48108
48109 #[simd_test(enable = "avx512f,avx512vl")]
48110 unsafe fn test_mm_maskz_cvtepu16_epi32() {
48111 let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
48112 let r = _mm_maskz_cvtepu16_epi32(0, a);
48113 assert_eq_m128i(r, _mm_setzero_si128());
48114 let r = _mm_maskz_cvtepu16_epi32(0b00001111, a);
48115 let e = _mm_set_epi32(12, 13, 14, 15);
48116 assert_eq_m128i(r, e);
48117 }
48118
48119 #[simd_test(enable = "avx512f")]
48120 unsafe fn test_mm512_cvtepi32_ps() {
48121 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48122 let r = _mm512_cvtepi32_ps(a);
48123 let e = _mm512_set_ps(
48124 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
48125 );
48126 assert_eq_m512(r, e);
48127 }
48128
48129 #[simd_test(enable = "avx512f")]
48130 unsafe fn test_mm512_mask_cvtepi32_ps() {
48131 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48132 let src = _mm512_set1_ps(-1.);
48133 let r = _mm512_mask_cvtepi32_ps(src, 0, a);
48134 assert_eq_m512(r, src);
48135 let r = _mm512_mask_cvtepi32_ps(src, 0b00000000_11111111, a);
48136 let e = _mm512_set_ps(
48137 -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
48138 );
48139 assert_eq_m512(r, e);
48140 }
48141
48142 #[simd_test(enable = "avx512f")]
48143 unsafe fn test_mm512_maskz_cvtepi32_ps() {
48144 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48145 let r = _mm512_maskz_cvtepi32_ps(0, a);
48146 assert_eq_m512(r, _mm512_setzero_ps());
48147 let r = _mm512_maskz_cvtepi32_ps(0b00000000_11111111, a);
48148 let e = _mm512_set_ps(
48149 0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
48150 );
48151 assert_eq_m512(r, e);
48152 }
48153
48154 #[simd_test(enable = "avx512f,avx512vl")]
48155 unsafe fn test_mm256_mask_cvtepi32_ps() {
48156 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48157 let src = _mm256_set1_ps(-1.);
48158 let r = _mm256_mask_cvtepi32_ps(src, 0, a);
48159 assert_eq_m256(r, src);
48160 let r = _mm256_mask_cvtepi32_ps(src, 0b11111111, a);
48161 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
48162 assert_eq_m256(r, e);
48163 }
48164
48165 #[simd_test(enable = "avx512f,avx512vl")]
48166 unsafe fn test_mm256_maskz_cvtepi32_ps() {
48167 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48168 let r = _mm256_maskz_cvtepi32_ps(0, a);
48169 assert_eq_m256(r, _mm256_setzero_ps());
48170 let r = _mm256_maskz_cvtepi32_ps(0b11111111, a);
48171 let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
48172 assert_eq_m256(r, e);
48173 }
48174
48175 #[simd_test(enable = "avx512f,avx512vl")]
48176 unsafe fn test_mm_mask_cvtepi32_ps() {
48177 let a = _mm_set_epi32(1, 2, 3, 4);
48178 let src = _mm_set1_ps(-1.);
48179 let r = _mm_mask_cvtepi32_ps(src, 0, a);
48180 assert_eq_m128(r, src);
48181 let r = _mm_mask_cvtepi32_ps(src, 0b00001111, a);
48182 let e = _mm_set_ps(1., 2., 3., 4.);
48183 assert_eq_m128(r, e);
48184 }
48185
48186 #[simd_test(enable = "avx512f,avx512vl")]
48187 unsafe fn test_mm_maskz_cvtepi32_ps() {
48188 let a = _mm_set_epi32(1, 2, 3, 4);
48189 let r = _mm_maskz_cvtepi32_ps(0, a);
48190 assert_eq_m128(r, _mm_setzero_ps());
48191 let r = _mm_maskz_cvtepi32_ps(0b00001111, a);
48192 let e = _mm_set_ps(1., 2., 3., 4.);
48193 assert_eq_m128(r, e);
48194 }
48195
48196 #[simd_test(enable = "avx512f")]
48197 unsafe fn test_mm512_cvtepu32_ps() {
48198 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48199 let r = _mm512_cvtepu32_ps(a);
48200 let e = _mm512_set_ps(
48201 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
48202 );
48203 assert_eq_m512(r, e);
48204 }
48205
48206 #[simd_test(enable = "avx512f")]
48207 unsafe fn test_mm512_mask_cvtepu32_ps() {
48208 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48209 let src = _mm512_set1_ps(-1.);
48210 let r = _mm512_mask_cvtepu32_ps(src, 0, a);
48211 assert_eq_m512(r, src);
48212 let r = _mm512_mask_cvtepu32_ps(src, 0b00000000_11111111, a);
48213 let e = _mm512_set_ps(
48214 -1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
48215 );
48216 assert_eq_m512(r, e);
48217 }
48218
48219 #[simd_test(enable = "avx512f")]
48220 unsafe fn test_mm512_maskz_cvtepu32_ps() {
48221 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48222 let r = _mm512_maskz_cvtepu32_ps(0, a);
48223 assert_eq_m512(r, _mm512_setzero_ps());
48224 let r = _mm512_maskz_cvtepu32_ps(0b00000000_11111111, a);
48225 let e = _mm512_set_ps(
48226 0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
48227 );
48228 assert_eq_m512(r, e);
48229 }
48230
48231 #[simd_test(enable = "avx512f")]
48232 unsafe fn test_mm512_cvtepi32_epi16() {
48233 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48234 let r = _mm512_cvtepi32_epi16(a);
48235 let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48236 assert_eq_m256i(r, e);
48237 }
48238
48239 #[simd_test(enable = "avx512f")]
48240 unsafe fn test_mm512_mask_cvtepi32_epi16() {
48241 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48242 let src = _mm256_set1_epi16(-1);
48243 let r = _mm512_mask_cvtepi32_epi16(src, 0, a);
48244 assert_eq_m256i(r, src);
48245 let r = _mm512_mask_cvtepi32_epi16(src, 0b00000000_11111111, a);
48246 let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48247 assert_eq_m256i(r, e);
48248 }
48249
48250 #[simd_test(enable = "avx512f")]
48251 unsafe fn test_mm512_maskz_cvtepi32_epi16() {
48252 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48253 let r = _mm512_maskz_cvtepi32_epi16(0, a);
48254 assert_eq_m256i(r, _mm256_setzero_si256());
48255 let r = _mm512_maskz_cvtepi32_epi16(0b00000000_11111111, a);
48256 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48257 assert_eq_m256i(r, e);
48258 }
48259
48260 #[simd_test(enable = "avx512f,avx512vl")]
48261 unsafe fn test_mm256_cvtepi32_epi16() {
48262 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48263 let r = _mm256_cvtepi32_epi16(a);
48264 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48265 assert_eq_m128i(r, e);
48266 }
48267
48268 #[simd_test(enable = "avx512f,avx512vl")]
48269 unsafe fn test_mm256_mask_cvtepi32_epi16() {
48270 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48271 let src = _mm_set1_epi16(-1);
48272 let r = _mm256_mask_cvtepi32_epi16(src, 0, a);
48273 assert_eq_m128i(r, src);
48274 let r = _mm256_mask_cvtepi32_epi16(src, 0b11111111, a);
48275 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48276 assert_eq_m128i(r, e);
48277 }
48278
48279 #[simd_test(enable = "avx512f,avx512vl")]
48280 unsafe fn test_mm256_maskz_cvtepi32_epi16() {
48281 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48282 let r = _mm256_maskz_cvtepi32_epi16(0, a);
48283 assert_eq_m128i(r, _mm_setzero_si128());
48284 let r = _mm256_maskz_cvtepi32_epi16(0b11111111, a);
48285 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48286 assert_eq_m128i(r, e);
48287 }
48288
48289 #[simd_test(enable = "avx512f,avx512vl")]
48290 unsafe fn test_mm_cvtepi32_epi16() {
48291 let a = _mm_set_epi32(4, 5, 6, 7);
48292 let r = _mm_cvtepi32_epi16(a);
48293 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48294 assert_eq_m128i(r, e);
48295 }
48296
48297 #[simd_test(enable = "avx512f,avx512vl")]
48298 unsafe fn test_mm_mask_cvtepi32_epi16() {
48299 let a = _mm_set_epi32(4, 5, 6, 7);
48300 let src = _mm_set1_epi16(0);
48301 let r = _mm_mask_cvtepi32_epi16(src, 0, a);
48302 assert_eq_m128i(r, src);
48303 let r = _mm_mask_cvtepi32_epi16(src, 0b00001111, a);
48304 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48305 assert_eq_m128i(r, e);
48306 }
48307
48308 #[simd_test(enable = "avx512f,avx512vl")]
48309 unsafe fn test_mm_maskz_cvtepi32_epi16() {
48310 let a = _mm_set_epi32(4, 5, 6, 7);
48311 let r = _mm_maskz_cvtepi32_epi16(0, a);
48312 assert_eq_m128i(r, _mm_setzero_si128());
48313 let r = _mm_maskz_cvtepi32_epi16(0b00001111, a);
48314 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48315 assert_eq_m128i(r, e);
48316 }
48317
48318 #[simd_test(enable = "avx512f")]
48319 unsafe fn test_mm512_cvtepi32_epi8() {
48320 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48321 let r = _mm512_cvtepi32_epi8(a);
48322 let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48323 assert_eq_m128i(r, e);
48324 }
48325
48326 #[simd_test(enable = "avx512f")]
48327 unsafe fn test_mm512_mask_cvtepi32_epi8() {
48328 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48329 let src = _mm_set1_epi8(-1);
48330 let r = _mm512_mask_cvtepi32_epi8(src, 0, a);
48331 assert_eq_m128i(r, src);
48332 let r = _mm512_mask_cvtepi32_epi8(src, 0b00000000_11111111, a);
48333 let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48334 assert_eq_m128i(r, e);
48335 }
48336
48337 #[simd_test(enable = "avx512f")]
48338 unsafe fn test_mm512_maskz_cvtepi32_epi8() {
48339 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
48340 let r = _mm512_maskz_cvtepi32_epi8(0, a);
48341 assert_eq_m128i(r, _mm_setzero_si128());
48342 let r = _mm512_maskz_cvtepi32_epi8(0b00000000_11111111, a);
48343 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
48344 assert_eq_m128i(r, e);
48345 }
48346
48347 #[simd_test(enable = "avx512f,avx512vl")]
48348 unsafe fn test_mm256_cvtepi32_epi8() {
48349 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48350 let r = _mm256_cvtepi32_epi8(a);
48351 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48352 assert_eq_m128i(r, e);
48353 }
48354
48355 #[simd_test(enable = "avx512f,avx512vl")]
48356 unsafe fn test_mm256_mask_cvtepi32_epi8() {
48357 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48358 let src = _mm_set1_epi8(0);
48359 let r = _mm256_mask_cvtepi32_epi8(src, 0, a);
48360 assert_eq_m128i(r, src);
48361 let r = _mm256_mask_cvtepi32_epi8(src, 0b11111111, a);
48362 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48363 assert_eq_m128i(r, e);
48364 }
48365
48366 #[simd_test(enable = "avx512f,avx512vl")]
48367 unsafe fn test_mm256_maskz_cvtepi32_epi8() {
48368 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48369 let r = _mm256_maskz_cvtepi32_epi8(0, a);
48370 assert_eq_m128i(r, _mm_setzero_si128());
48371 let r = _mm256_maskz_cvtepi32_epi8(0b11111111, a);
48372 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
48373 assert_eq_m128i(r, e);
48374 }
48375
48376 #[simd_test(enable = "avx512f,avx512vl")]
48377 unsafe fn test_mm_cvtepi32_epi8() {
48378 let a = _mm_set_epi32(4, 5, 6, 7);
48379 let r = _mm_cvtepi32_epi8(a);
48380 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48381 assert_eq_m128i(r, e);
48382 }
48383
48384 #[simd_test(enable = "avx512f,avx512vl")]
48385 unsafe fn test_mm_mask_cvtepi32_epi8() {
48386 let a = _mm_set_epi32(4, 5, 6, 7);
48387 let src = _mm_set1_epi8(0);
48388 let r = _mm_mask_cvtepi32_epi8(src, 0, a);
48389 assert_eq_m128i(r, src);
48390 let r = _mm_mask_cvtepi32_epi8(src, 0b00001111, a);
48391 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48392 assert_eq_m128i(r, e);
48393 }
48394
48395 #[simd_test(enable = "avx512f,avx512vl")]
48396 unsafe fn test_mm_maskz_cvtepi32_epi8() {
48397 let a = _mm_set_epi32(4, 5, 6, 7);
48398 let r = _mm_maskz_cvtepi32_epi8(0, a);
48399 assert_eq_m128i(r, _mm_setzero_si128());
48400 let r = _mm_maskz_cvtepi32_epi8(0b00001111, a);
48401 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
48402 assert_eq_m128i(r, e);
48403 }
48404
48405 #[simd_test(enable = "avx512f")]
48406 unsafe fn test_mm512_cvtsepi32_epi16() {
48407 #[rustfmt::skip]
48408 let a = _mm512_set_epi32(
48409 0, 1, 2, 3,
48410 4, 5, 6, 7,
48411 8, 9, 10, 11,
48412 12, 13, i32::MIN, i32::MAX,
48413 );
48414 let r = _mm512_cvtsepi32_epi16(a);
48415 #[rustfmt::skip]
48416 let e = _mm256_set_epi16(
48417 0, 1, 2, 3,
48418 4, 5, 6, 7,
48419 8, 9, 10, 11,
48420 12, 13, i16::MIN, i16::MAX,
48421 );
48422 assert_eq_m256i(r, e);
48423 }
48424
48425 #[simd_test(enable = "avx512f")]
48426 unsafe fn test_mm512_mask_cvtsepi32_epi16() {
48427 #[rustfmt::skip]
48428 let a = _mm512_set_epi32(
48429 0, 1, 2, 3,
48430 4, 5, 6, 7,
48431 8, 9, 10, 11,
48432 12, 13, i32::MIN, i32::MAX,
48433 );
48434 let src = _mm256_set1_epi16(-1);
48435 let r = _mm512_mask_cvtsepi32_epi16(src, 0, a);
48436 assert_eq_m256i(r, src);
48437 let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a);
48438 #[rustfmt::skip]
48439 let e = _mm256_set_epi16(
48440 -1, -1, -1, -1,
48441 -1, -1, -1, -1,
48442 8, 9, 10, 11,
48443 12, 13, i16::MIN, i16::MAX,
48444 );
48445 assert_eq_m256i(r, e);
48446 }
48447
48448 #[simd_test(enable = "avx512f")]
48449 unsafe fn test_mm512_maskz_cvtsepi32_epi16() {
48450 #[rustfmt::skip]
48451 let a = _mm512_set_epi32(
48452 0, 1, 2, 3,
48453 4, 5, 6, 7,
48454 8, 9, 10, 11,
48455 12, 13, i32::MIN, i32::MAX,
48456 );
48457 let r = _mm512_maskz_cvtsepi32_epi16(0, a);
48458 assert_eq_m256i(r, _mm256_setzero_si256());
48459 let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a);
48460 #[rustfmt::skip]
48461 let e = _mm256_set_epi16(
48462 0, 0, 0, 0,
48463 0, 0, 0, 0,
48464 8, 9, 10, 11,
48465 12, 13, i16::MIN, i16::MAX,
48466 );
48467 assert_eq_m256i(r, e);
48468 }
48469
48470 #[simd_test(enable = "avx512f,avx512vl")]
48471 unsafe fn test_mm256_cvtsepi32_epi16() {
48472 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48473 let r = _mm256_cvtsepi32_epi16(a);
48474 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48475 assert_eq_m128i(r, e);
48476 }
48477
48478 #[simd_test(enable = "avx512f,avx512vl")]
48479 unsafe fn test_mm256_mask_cvtsepi32_epi16() {
48480 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48481 let src = _mm_set1_epi16(-1);
48482 let r = _mm256_mask_cvtsepi32_epi16(src, 0, a);
48483 assert_eq_m128i(r, src);
48484 let r = _mm256_mask_cvtsepi32_epi16(src, 0b11111111, a);
48485 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48486 assert_eq_m128i(r, e);
48487 }
48488
48489 #[simd_test(enable = "avx512f,avx512vl")]
48490 unsafe fn test_mm256_maskz_cvtsepi32_epi16() {
48491 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
48492 let r = _mm256_maskz_cvtsepi32_epi16(0, a);
48493 assert_eq_m128i(r, _mm_setzero_si128());
48494 let r = _mm256_maskz_cvtsepi32_epi16(0b11111111, a);
48495 let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
48496 assert_eq_m128i(r, e);
48497 }
48498
48499 #[simd_test(enable = "avx512f,avx512vl")]
48500 unsafe fn test_mm_cvtsepi32_epi16() {
48501 let a = _mm_set_epi32(4, 5, 6, 7);
48502 let r = _mm_cvtsepi32_epi16(a);
48503 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48504 assert_eq_m128i(r, e);
48505 }
48506
48507 #[simd_test(enable = "avx512f,avx512vl")]
48508 unsafe fn test_mm_mask_cvtsepi32_epi16() {
48509 let a = _mm_set_epi32(4, 5, 6, 7);
48510 let src = _mm_set1_epi16(0);
48511 let r = _mm_mask_cvtsepi32_epi16(src, 0, a);
48512 assert_eq_m128i(r, src);
48513 let r = _mm_mask_cvtsepi32_epi16(src, 0b11111111, a);
48514 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48515 assert_eq_m128i(r, e);
48516 }
48517
48518 #[simd_test(enable = "avx512f,avx512vl")]
48519 unsafe fn test_mm_maskz_cvtsepi32_epi16() {
48520 let a = _mm_set_epi32(4, 5, 6, 7);
48521 let r = _mm_maskz_cvtsepi32_epi16(0, a);
48522 assert_eq_m128i(r, _mm_setzero_si128());
48523 let r = _mm_maskz_cvtsepi32_epi16(0b11111111, a);
48524 let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
48525 assert_eq_m128i(r, e);
48526 }
48527
48528 #[simd_test(enable = "avx512f")]
48529 unsafe fn test_mm512_cvtsepi32_epi8() {
48530 #[rustfmt::skip]
48531 let a = _mm512_set_epi32(
48532 0, 1, 2, 3,
48533 4, 5, 6, 7,
48534 8, 9, 10, 11,
48535 12, 13, i32::MIN, i32::MAX,
48536 );
48537 let r = _mm512_cvtsepi32_epi8(a);
48538 #[rustfmt::skip]
48539 let e = _mm_set_epi8(
48540 0, 1, 2, 3,
48541 4, 5, 6, 7,
48542 8, 9, 10, 11,
48543 12, 13, i8::MIN, i8::MAX,
48544 );
48545 assert_eq_m128i(r, e);
48546 }
48547
48548 #[simd_test(enable = "avx512f")]
48549 unsafe fn test_mm512_mask_cvtsepi32_epi8() {
48550 #[rustfmt::skip]
48551 let a = _mm512_set_epi32(
48552 0, 1, 2, 3,
48553 4, 5, 6, 7,
48554 8, 9, 10, 11,
48555 12, 13, i32::MIN, i32::MAX,
48556 );
48557 let src = _mm_set1_epi8(-1);
48558 let r = _mm512_mask_cvtsepi32_epi8(src, 0, a);
48559 assert_eq_m128i(r, src);
48560 let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a);
48561 #[rustfmt::skip]
48562 let e = _mm_set_epi8(
48563 -1, -1, -1, -1,
48564 -1, -1, -1, -1,
48565 8, 9, 10, 11,
48566 12, 13, i8::MIN, i8::MAX,
48567 );
48568 assert_eq_m128i(r, e);
48569 }
48570
48571 #[simd_test(enable = "avx512f")]
48572 unsafe fn test_mm512_maskz_cvtsepi32_epi8() {
48573 #[rustfmt::skip]
48574 let a = _mm512_set_epi32(
48575 0, 1, 2, 3,
48576 4, 5, 6, 7,
48577 8, 9, 10, 11,
48578 12, 13, i32::MIN, i32::MAX,
48579 );
48580 let r = _mm512_maskz_cvtsepi32_epi8(0, a);
48581 assert_eq_m128i(r, _mm_setzero_si128());
48582 let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a);
48583 #[rustfmt::skip]
48584 let e = _mm_set_epi8(
48585 0, 0, 0, 0,
48586 0, 0, 0, 0,
48587 8, 9, 10, 11,
48588 12, 13, i8::MIN, i8::MAX,
48589 );
48590 assert_eq_m128i(r, e);
48591 }
48592
48593 #[simd_test(enable = "avx512f,avx512vl")]
48594 unsafe fn test_mm256_cvtsepi32_epi8() {
48595 let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48596 let r = _mm256_cvtsepi32_epi8(a);
48597 #[rustfmt::skip]
48598 let e = _mm_set_epi8(
48599 0, 0, 0, 0,
48600 0, 0, 0, 0,
48601 9, 10, 11, 12,
48602 13, 14, 15, 16,
48603 );
48604 assert_eq_m128i(r, e);
48605 }
48606
48607 #[simd_test(enable = "avx512f,avx512vl")]
48608 unsafe fn test_mm256_mask_cvtsepi32_epi8() {
48609 let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48610 let src = _mm_set1_epi8(0);
48611 let r = _mm256_mask_cvtsepi32_epi8(src, 0, a);
48612 assert_eq_m128i(r, src);
48613 let r = _mm256_mask_cvtsepi32_epi8(src, 0b11111111, a);
48614 #[rustfmt::skip]
48615 let e = _mm_set_epi8(
48616 0, 0, 0, 0,
48617 0, 0, 0, 0,
48618 9, 10, 11, 12,
48619 13, 14, 15, 16,
48620 );
48621 assert_eq_m128i(r, e);
48622 }
48623
48624 #[simd_test(enable = "avx512f,avx512vl")]
48625 unsafe fn test_mm256_maskz_cvtsepi32_epi8() {
48626 let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
48627 let r = _mm256_maskz_cvtsepi32_epi8(0, a);
48628 assert_eq_m128i(r, _mm_setzero_si128());
48629 let r = _mm256_maskz_cvtsepi32_epi8(0b11111111, a);
48630 #[rustfmt::skip]
48631 let e = _mm_set_epi8(
48632 0, 0, 0, 0,
48633 0, 0, 0, 0,
48634 9, 10, 11, 12,
48635 13, 14, 15, 16,
48636 );
48637 assert_eq_m128i(r, e);
48638 }
48639
48640 #[simd_test(enable = "avx512f,avx512vl")]
48641 unsafe fn test_mm_cvtsepi32_epi8() {
48642 let a = _mm_set_epi32(13, 14, 15, 16);
48643 let r = _mm_cvtsepi32_epi8(a);
48644 #[rustfmt::skip]
48645 let e = _mm_set_epi8(
48646 0, 0, 0, 0,
48647 0, 0, 0, 0,
48648 0, 0, 0, 0,
48649 13, 14, 15, 16,
48650 );
48651 assert_eq_m128i(r, e);
48652 }
48653
48654 #[simd_test(enable = "avx512f,avx512vl")]
48655 unsafe fn test_mm_mask_cvtsepi32_epi8() {
48656 let a = _mm_set_epi32(13, 14, 15, 16);
48657 let src = _mm_set1_epi8(0);
48658 let r = _mm_mask_cvtsepi32_epi8(src, 0, a);
48659 assert_eq_m128i(r, src);
48660 let r = _mm_mask_cvtsepi32_epi8(src, 0b00001111, a);
48661 #[rustfmt::skip]
48662 let e = _mm_set_epi8(
48663 0, 0, 0, 0,
48664 0, 0, 0, 0,
48665 0, 0, 0, 0,
48666 13, 14, 15, 16,
48667 );
48668 assert_eq_m128i(r, e);
48669 }
48670
48671 #[simd_test(enable = "avx512f,avx512vl")]
48672 unsafe fn test_mm_maskz_cvtsepi32_epi8() {
48673 let a = _mm_set_epi32(13, 14, 15, 16);
48674 let r = _mm_maskz_cvtsepi32_epi8(0, a);
48675 assert_eq_m128i(r, _mm_setzero_si128());
48676 let r = _mm_maskz_cvtsepi32_epi8(0b00001111, a);
48677 #[rustfmt::skip]
48678 let e = _mm_set_epi8(
48679 0, 0, 0, 0,
48680 0, 0, 0, 0,
48681 0, 0, 0, 0,
48682 13, 14, 15, 16,
48683 );
48684 assert_eq_m128i(r, e);
48685 }
48686
48687 #[simd_test(enable = "avx512f")]
48688 unsafe fn test_mm512_cvtusepi32_epi16() {
48689 #[rustfmt::skip]
48690 let a = _mm512_set_epi32(
48691 0, 1, 2, 3,
48692 4, 5, 6, 7,
48693 8, 9, 10, 11,
48694 12, 13, i32::MIN, i32::MIN,
48695 );
48696 let r = _mm512_cvtusepi32_epi16(a);
48697 let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
48698 assert_eq_m256i(r, e);
48699 }
48700
48701 #[simd_test(enable = "avx512f")]
48702 unsafe fn test_mm512_mask_cvtusepi32_epi16() {
48703 #[rustfmt::skip]
48704 let a = _mm512_set_epi32(
48705 0, 1, 2, 3,
48706 4, 5, 6, 7,
48707 8, 9, 10, 11,
48708 12, 13, i32::MIN, i32::MIN,
48709 );
48710 let src = _mm256_set1_epi16(-1);
48711 let r = _mm512_mask_cvtusepi32_epi16(src, 0, a);
48712 assert_eq_m256i(r, src);
48713 let r = _mm512_mask_cvtusepi32_epi16(src, 0b00000000_11111111, a);
48714 let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
48715 assert_eq_m256i(r, e);
48716 }
48717
48718 #[simd_test(enable = "avx512f")]
48719 unsafe fn test_mm512_maskz_cvtusepi32_epi16() {
48720 #[rustfmt::skip]
48721 let a = _mm512_set_epi32(
48722 0, 1, 2, 3,
48723 4, 5, 6, 7,
48724 8, 9, 10, 11,
48725 12, 13, i32::MIN, i32::MIN,
48726 );
48727 let r = _mm512_maskz_cvtusepi32_epi16(0, a);
48728 assert_eq_m256i(r, _mm256_setzero_si256());
48729 let r = _mm512_maskz_cvtusepi32_epi16(0b00000000_11111111, a);
48730 let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
48731 assert_eq_m256i(r, e);
48732 }
48733
48734 #[simd_test(enable = "avx512f,avx512vl")]
48735 unsafe fn test_mm256_cvtusepi32_epi16() {
48736 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48737 let r = _mm256_cvtusepi32_epi16(a);
48738 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48739 assert_eq_m128i(r, e);
48740 }
48741
48742 #[simd_test(enable = "avx512f,avx512vl")]
48743 unsafe fn test_mm256_mask_cvtusepi32_epi16() {
48744 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48745 let src = _mm_set1_epi16(0);
48746 let r = _mm256_mask_cvtusepi32_epi16(src, 0, a);
48747 assert_eq_m128i(r, src);
48748 let r = _mm256_mask_cvtusepi32_epi16(src, 0b11111111, a);
48749 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48750 assert_eq_m128i(r, e);
48751 }
48752
48753 #[simd_test(enable = "avx512f,avx512vl")]
48754 unsafe fn test_mm256_maskz_cvtusepi32_epi16() {
48755 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
48756 let r = _mm256_maskz_cvtusepi32_epi16(0, a);
48757 assert_eq_m128i(r, _mm_setzero_si128());
48758 let r = _mm256_maskz_cvtusepi32_epi16(0b11111111, a);
48759 let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
48760 assert_eq_m128i(r, e);
48761 }
48762
48763 #[simd_test(enable = "avx512f,avx512vl")]
48764 unsafe fn test_mm_cvtusepi32_epi16() {
48765 let a = _mm_set_epi32(5, 6, 7, 8);
48766 let r = _mm_cvtusepi32_epi16(a);
48767 let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48768 assert_eq_m128i(r, e);
48769 }
48770
48771 #[simd_test(enable = "avx512f,avx512vl")]
48772 unsafe fn test_mm_mask_cvtusepi32_epi16() {
48773 let a = _mm_set_epi32(5, 6, 7, 8);
48774 let src = _mm_set1_epi16(0);
48775 let r = _mm_mask_cvtusepi32_epi16(src, 0, a);
48776 assert_eq_m128i(r, src);
48777 let r = _mm_mask_cvtusepi32_epi16(src, 0b00001111, a);
48778 let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48779 assert_eq_m128i(r, e);
48780 }
48781
48782 #[simd_test(enable = "avx512f,avx512vl")]
48783 unsafe fn test_mm_maskz_cvtusepi32_epi16() {
48784 let a = _mm_set_epi32(5, 6, 7, 8);
48785 let r = _mm_maskz_cvtusepi32_epi16(0, a);
48786 assert_eq_m128i(r, _mm_setzero_si128());
48787 let r = _mm_maskz_cvtusepi32_epi16(0b00001111, a);
48788 let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
48789 assert_eq_m128i(r, e);
48790 }
48791
48792 #[simd_test(enable = "avx512f")]
48793 unsafe fn test_mm512_cvtusepi32_epi8() {
48794 #[rustfmt::skip]
48795 let a = _mm512_set_epi32(
48796 0, 1, 2, 3,
48797 4, 5, 6, 7,
48798 8, 9, 10, 11,
48799 12, 13, i32::MIN, i32::MIN,
48800 );
48801 let r = _mm512_cvtusepi32_epi8(a);
48802 let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
48803 assert_eq_m128i(r, e);
48804 }
48805
48806 #[simd_test(enable = "avx512f")]
48807 unsafe fn test_mm512_mask_cvtusepi32_epi8() {
48808 #[rustfmt::skip]
48809 let a = _mm512_set_epi32(
48810 0, 1, 2, 3,
48811 4, 5, 6, 7,
48812 8, 9, 10, 11,
48813 12, 13, i32::MIN, i32::MIN,
48814 );
48815 let src = _mm_set1_epi8(-1);
48816 let r = _mm512_mask_cvtusepi32_epi8(src, 0, a);
48817 assert_eq_m128i(r, src);
48818 let r = _mm512_mask_cvtusepi32_epi8(src, 0b00000000_11111111, a);
48819 let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
48820 assert_eq_m128i(r, e);
48821 }
48822
48823 #[simd_test(enable = "avx512f")]
48824 unsafe fn test_mm512_maskz_cvtusepi32_epi8() {
48825 #[rustfmt::skip]
48826 let a = _mm512_set_epi32(
48827 0, 1, 2, 3,
48828 4, 5, 6, 7,
48829 8, 9, 10, 11,
48830 12, 13, i32::MIN, i32::MIN,
48831 );
48832 let r = _mm512_maskz_cvtusepi32_epi8(0, a);
48833 assert_eq_m128i(r, _mm_setzero_si128());
48834 let r = _mm512_maskz_cvtusepi32_epi8(0b00000000_11111111, a);
48835 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
48836 assert_eq_m128i(r, e);
48837 }
48838
48839 #[simd_test(enable = "avx512f,avx512vl")]
48840 unsafe fn test_mm256_cvtusepi32_epi8() {
48841 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48842 let r = _mm256_cvtusepi32_epi8(a);
48843 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48844 assert_eq_m128i(r, e);
48845 }
48846
48847 #[simd_test(enable = "avx512f,avx512vl")]
48848 unsafe fn test_mm256_mask_cvtusepi32_epi8() {
48849 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48850 let src = _mm_set1_epi8(0);
48851 let r = _mm256_mask_cvtusepi32_epi8(src, 0, a);
48852 assert_eq_m128i(r, src);
48853 let r = _mm256_mask_cvtusepi32_epi8(src, 0b11111111, a);
48854 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48855 assert_eq_m128i(r, e);
48856 }
48857
48858 #[simd_test(enable = "avx512f,avx512vl")]
48859 unsafe fn test_mm256_maskz_cvtusepi32_epi8() {
48860 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
48861 let r = _mm256_maskz_cvtusepi32_epi8(0, a);
48862 assert_eq_m128i(r, _mm_setzero_si128());
48863 let r = _mm256_maskz_cvtusepi32_epi8(0b11111111, a);
48864 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
48865 assert_eq_m128i(r, e);
48866 }
48867
48868 #[simd_test(enable = "avx512f,avx512vl")]
48869 unsafe fn test_mm_cvtusepi32_epi8() {
48870 let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48871 let r = _mm_cvtusepi32_epi8(a);
48872 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48873 assert_eq_m128i(r, e);
48874 }
48875
48876 #[simd_test(enable = "avx512f,avx512vl")]
48877 unsafe fn test_mm_mask_cvtusepi32_epi8() {
48878 let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48879 let src = _mm_set1_epi8(0);
48880 let r = _mm_mask_cvtusepi32_epi8(src, 0, a);
48881 assert_eq_m128i(r, src);
48882 let r = _mm_mask_cvtusepi32_epi8(src, 0b00001111, a);
48883 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48884 assert_eq_m128i(r, e);
48885 }
48886
48887 #[simd_test(enable = "avx512f,avx512vl")]
48888 unsafe fn test_mm_maskz_cvtusepi32_epi8() {
48889 let a = _mm_set_epi32(5, 6, 7, i32::MAX);
48890 let r = _mm_maskz_cvtusepi32_epi8(0, a);
48891 assert_eq_m128i(r, _mm_setzero_si128());
48892 let r = _mm_maskz_cvtusepi32_epi8(0b00001111, a);
48893 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
48894 assert_eq_m128i(r, e);
48895 }
48896
48897 #[simd_test(enable = "avx512f")]
48898 unsafe fn test_mm512_cvt_roundps_epi32() {
48899 let a = _mm512_setr_ps(
48900 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48901 );
48902 let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48903 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
48904 assert_eq_m512i(r, e);
48905 let r = _mm512_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
48906 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15);
48907 assert_eq_m512i(r, e);
48908 }
48909
48910 #[simd_test(enable = "avx512f")]
48911 unsafe fn test_mm512_mask_cvt_roundps_epi32() {
48912 let a = _mm512_setr_ps(
48913 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48914 );
48915 let src = _mm512_set1_epi32(0);
48916 let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48917 src, 0, a,
48918 );
48919 assert_eq_m512i(r, src);
48920 let r = _mm512_mask_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48921 src,
48922 0b00000000_11111111,
48923 a,
48924 );
48925 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
48926 assert_eq_m512i(r, e);
48927 }
48928
48929 #[simd_test(enable = "avx512f")]
48930 unsafe fn test_mm512_maskz_cvt_roundps_epi32() {
48931 let a = _mm512_setr_ps(
48932 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48933 );
48934 let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48935 0, a,
48936 );
48937 assert_eq_m512i(r, _mm512_setzero_si512());
48938 let r = _mm512_maskz_cvt_roundps_epi32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48939 0b00000000_11111111,
48940 a,
48941 );
48942 let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
48943 assert_eq_m512i(r, e);
48944 }
48945
48946 #[simd_test(enable = "avx512f")]
48947 unsafe fn test_mm512_cvt_roundps_epu32() {
48948 let a = _mm512_setr_ps(
48949 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48950 );
48951 let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48952 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
48953 assert_eq_m512i(r, e);
48954 let r = _mm512_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC }>(a);
48955 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
48956 assert_eq_m512i(r, e);
48957 }
48958
48959 #[simd_test(enable = "avx512f")]
48960 unsafe fn test_mm512_mask_cvt_roundps_epu32() {
48961 let a = _mm512_setr_ps(
48962 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48963 );
48964 let src = _mm512_set1_epi32(0);
48965 let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48966 src, 0, a,
48967 );
48968 assert_eq_m512i(r, src);
48969 let r = _mm512_mask_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48970 src,
48971 0b00000000_11111111,
48972 a,
48973 );
48974 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
48975 assert_eq_m512i(r, e);
48976 }
48977
48978 #[simd_test(enable = "avx512f")]
48979 unsafe fn test_mm512_maskz_cvt_roundps_epu32() {
48980 let a = _mm512_setr_ps(
48981 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
48982 );
48983 let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48984 0, a,
48985 );
48986 assert_eq_m512i(r, _mm512_setzero_si512());
48987 let r = _mm512_maskz_cvt_roundps_epu32::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
48988 0b00000000_11111111,
48989 a,
48990 );
48991 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
48992 assert_eq_m512i(r, e);
48993 }
48994
48995 #[simd_test(enable = "avx512f")]
48996 unsafe fn test_mm512_cvt_roundepi32_ps() {
48997 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
48998 let r = _mm512_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
48999 let e = _mm512_setr_ps(
49000 0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16.,
49001 );
49002 assert_eq_m512(r, e);
49003 }
49004
49005 #[simd_test(enable = "avx512f")]
49006 unsafe fn test_mm512_mask_cvt_roundepi32_ps() {
49007 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49008 let src = _mm512_set1_ps(0.);
49009 let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49010 src, 0, a,
49011 );
49012 assert_eq_m512(r, src);
49013 let r = _mm512_mask_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49014 src,
49015 0b00000000_11111111,
49016 a,
49017 );
49018 let e = _mm512_setr_ps(
49019 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
49020 );
49021 assert_eq_m512(r, e);
49022 }
49023
49024 #[simd_test(enable = "avx512f")]
49025 unsafe fn test_mm512_maskz_cvt_roundepi32_ps() {
49026 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49027 let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49028 0, a,
49029 );
49030 assert_eq_m512(r, _mm512_setzero_ps());
49031 let r = _mm512_maskz_cvt_roundepi32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49032 0b00000000_11111111,
49033 a,
49034 );
49035 let e = _mm512_setr_ps(
49036 0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
49037 );
49038 assert_eq_m512(r, e);
49039 }
49040
49041 #[simd_test(enable = "avx512f")]
49042 unsafe fn test_mm512_cvt_roundepu32_ps() {
49043 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49044 let r = _mm512_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
49045 #[rustfmt::skip]
49046 let e = _mm512_setr_ps(
49047 0., 4294967300., 2., 4294967300.,
49048 4., 4294967300., 6., 4294967300.,
49049 8., 10., 10., 12.,
49050 12., 14., 14., 16.,
49051 );
49052 assert_eq_m512(r, e);
49053 }
49054
49055 #[simd_test(enable = "avx512f")]
49056 unsafe fn test_mm512_mask_cvt_roundepu32_ps() {
49057 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49058 let src = _mm512_set1_ps(0.);
49059 let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49060 src, 0, a,
49061 );
49062 assert_eq_m512(r, src);
49063 let r = _mm512_mask_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49064 src,
49065 0b00000000_11111111,
49066 a,
49067 );
49068 #[rustfmt::skip]
49069 let e = _mm512_setr_ps(
49070 0., 4294967300., 2., 4294967300.,
49071 4., 4294967300., 6., 4294967300.,
49072 0., 0., 0., 0.,
49073 0., 0., 0., 0.,
49074 );
49075 assert_eq_m512(r, e);
49076 }
49077
49078 #[simd_test(enable = "avx512f")]
49079 unsafe fn test_mm512_maskz_cvt_roundepu32_ps() {
49080 let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
49081 let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49082 0, a,
49083 );
49084 assert_eq_m512(r, _mm512_setzero_ps());
49085 let r = _mm512_maskz_cvt_roundepu32_ps::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
49086 0b00000000_11111111,
49087 a,
49088 );
49089 #[rustfmt::skip]
49090 let e = _mm512_setr_ps(
49091 0., 4294967300., 2., 4294967300.,
49092 4., 4294967300., 6., 4294967300.,
49093 0., 0., 0., 0.,
49094 0., 0., 0., 0.,
49095 );
49096 assert_eq_m512(r, e);
49097 }
49098
49099 #[simd_test(enable = "avx512f")]
49100 unsafe fn test_mm512_cvt_roundps_ph() {
49101 let a = _mm512_set1_ps(1.);
49102 let r = _mm512_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(a);
49103 let e = _mm256_setr_epi64x(
49104 4323521613979991040,
49105 4323521613979991040,
49106 4323521613979991040,
49107 4323521613979991040,
49108 );
49109 assert_eq_m256i(r, e);
49110 }
49111
49112 #[simd_test(enable = "avx512f")]
49113 unsafe fn test_mm512_mask_cvt_roundps_ph() {
49114 let a = _mm512_set1_ps(1.);
49115 let src = _mm256_set1_epi16(0);
49116 let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49117 assert_eq_m256i(r, src);
49118 let r = _mm512_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49119 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49120 assert_eq_m256i(r, e);
49121 }
49122
49123 #[simd_test(enable = "avx512f")]
49124 unsafe fn test_mm512_maskz_cvt_roundps_ph() {
49125 let a = _mm512_set1_ps(1.);
49126 let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49127 assert_eq_m256i(r, _mm256_setzero_si256());
49128 let r = _mm512_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49129 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49130 assert_eq_m256i(r, e);
49131 }
49132
49133 #[simd_test(enable = "avx512f,avx512vl")]
49134 unsafe fn test_mm256_mask_cvt_roundps_ph() {
49135 let a = _mm256_set1_ps(1.);
49136 let src = _mm_set1_epi16(0);
49137 let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49138 assert_eq_m128i(r, src);
49139 let r = _mm256_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
49140 let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49141 assert_eq_m128i(r, e);
49142 }
49143
49144 #[simd_test(enable = "avx512f,avx512vl")]
49145 unsafe fn test_mm256_maskz_cvt_roundps_ph() {
49146 let a = _mm256_set1_ps(1.);
49147 let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49148 assert_eq_m128i(r, _mm_setzero_si128());
49149 let r = _mm256_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
49150 let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49151 assert_eq_m128i(r, e);
49152 }
49153
49154 #[simd_test(enable = "avx512f,avx512vl")]
49155 unsafe fn test_mm_mask_cvt_roundps_ph() {
49156 let a = _mm_set1_ps(1.);
49157 let src = _mm_set1_epi16(0);
49158 let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49159 assert_eq_m128i(r, src);
49160 let r = _mm_mask_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
49161 let e = _mm_setr_epi64x(4323521613979991040, 0);
49162 assert_eq_m128i(r, e);
49163 }
49164
49165 #[simd_test(enable = "avx512f,avx512vl")]
49166 unsafe fn test_mm_maskz_cvt_roundps_ph() {
49167 let a = _mm_set1_ps(1.);
49168 let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0, a);
49169 assert_eq_m128i(r, _mm_setzero_si128());
49170 let r = _mm_maskz_cvt_roundps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
49171 let e = _mm_setr_epi64x(4323521613979991040, 0);
49172 assert_eq_m128i(r, e);
49173 }
49174
49175 #[simd_test(enable = "avx512f")]
49176 unsafe fn test_mm512_cvtps_ph() {
49177 let a = _mm512_set1_ps(1.);
49178 let r = _mm512_cvtps_ph::<_MM_FROUND_NO_EXC>(a);
49179 let e = _mm256_setr_epi64x(
49180 4323521613979991040,
49181 4323521613979991040,
49182 4323521613979991040,
49183 4323521613979991040,
49184 );
49185 assert_eq_m256i(r, e);
49186 }
49187
49188 #[simd_test(enable = "avx512f")]
49189 unsafe fn test_mm512_mask_cvtps_ph() {
49190 let a = _mm512_set1_ps(1.);
49191 let src = _mm256_set1_epi16(0);
49192 let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49193 assert_eq_m256i(r, src);
49194 let r = _mm512_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49195 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49196 assert_eq_m256i(r, e);
49197 }
49198
49199 #[simd_test(enable = "avx512f")]
49200 unsafe fn test_mm512_maskz_cvtps_ph() {
49201 let a = _mm512_set1_ps(1.);
49202 let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49203 assert_eq_m256i(r, _mm256_setzero_si256());
49204 let r = _mm512_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49205 let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
49206 assert_eq_m256i(r, e);
49207 }
49208
49209 #[simd_test(enable = "avx512f,avx512vl")]
49210 unsafe fn test_mm256_mask_cvtps_ph() {
49211 let a = _mm256_set1_ps(1.);
49212 let src = _mm_set1_epi16(0);
49213 let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49214 assert_eq_m128i(r, src);
49215 let r = _mm256_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b11111111, a);
49216 let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49217 assert_eq_m128i(r, e);
49218 }
49219
49220 #[simd_test(enable = "avx512f,avx512vl")]
49221 unsafe fn test_mm256_maskz_cvtps_ph() {
49222 let a = _mm256_set1_ps(1.);
49223 let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49224 assert_eq_m128i(r, _mm_setzero_si128());
49225 let r = _mm256_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b11111111, a);
49226 let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49227 assert_eq_m128i(r, e);
49228 }
49229
49230 #[simd_test(enable = "avx512f,avx512vl")]
49231 unsafe fn test_mm_mask_cvtps_ph() {
49232 let a = _mm_set1_ps(1.);
49233 let src = _mm_set1_epi16(0);
49234 let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0, a);
49235 assert_eq_m128i(r, src);
49236 let r = _mm_mask_cvtps_ph::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
49237 let e = _mm_setr_epi64x(4323521613979991040, 0);
49238 assert_eq_m128i(r, e);
49239 }
49240
49241 #[simd_test(enable = "avx512f,avx512vl")]
49242 unsafe fn test_mm_maskz_cvtps_ph() {
49243 let a = _mm_set1_ps(1.);
49244 let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0, a);
49245 assert_eq_m128i(r, _mm_setzero_si128());
49246 let r = _mm_maskz_cvtps_ph::<_MM_FROUND_NO_EXC>(0b00001111, a);
49247 let e = _mm_setr_epi64x(4323521613979991040, 0);
49248 assert_eq_m128i(r, e);
49249 }
49250
49251 #[simd_test(enable = "avx512f")]
49252 unsafe fn test_mm512_cvt_roundph_ps() {
49253 let a = _mm256_setr_epi64x(
49254 4323521613979991040,
49255 4323521613979991040,
49256 4323521613979991040,
49257 4323521613979991040,
49258 );
49259 let r = _mm512_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(a);
49260 let e = _mm512_set1_ps(1.);
49261 assert_eq_m512(r, e);
49262 }
49263
49264 #[simd_test(enable = "avx512f")]
49265 unsafe fn test_mm512_mask_cvt_roundph_ps() {
49266 let a = _mm256_setr_epi64x(
49267 4323521613979991040,
49268 4323521613979991040,
49269 4323521613979991040,
49270 4323521613979991040,
49271 );
49272 let src = _mm512_set1_ps(0.);
49273 let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0, a);
49274 assert_eq_m512(r, src);
49275 let r = _mm512_mask_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49276 let e = _mm512_setr_ps(
49277 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49278 );
49279 assert_eq_m512(r, e);
49280 }
49281
49282 #[simd_test(enable = "avx512f")]
49283 unsafe fn test_mm512_maskz_cvt_roundph_ps() {
49284 let a = _mm256_setr_epi64x(
49285 4323521613979991040,
49286 4323521613979991040,
49287 4323521613979991040,
49288 4323521613979991040,
49289 );
49290 let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0, a);
49291 assert_eq_m512(r, _mm512_setzero_ps());
49292 let r = _mm512_maskz_cvt_roundph_ps::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49293 let e = _mm512_setr_ps(
49294 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49295 );
49296 assert_eq_m512(r, e);
49297 }
49298
49299 #[simd_test(enable = "avx512f")]
49300 unsafe fn test_mm512_cvtph_ps() {
49301 let a = _mm256_setr_epi64x(
49302 4323521613979991040,
49303 4323521613979991040,
49304 4323521613979991040,
49305 4323521613979991040,
49306 );
49307 let r = _mm512_cvtph_ps(a);
49308 let e = _mm512_set1_ps(1.);
49309 assert_eq_m512(r, e);
49310 }
49311
49312 #[simd_test(enable = "avx512f")]
49313 unsafe fn test_mm512_mask_cvtph_ps() {
49314 let a = _mm256_setr_epi64x(
49315 4323521613979991040,
49316 4323521613979991040,
49317 4323521613979991040,
49318 4323521613979991040,
49319 );
49320 let src = _mm512_set1_ps(0.);
49321 let r = _mm512_mask_cvtph_ps(src, 0, a);
49322 assert_eq_m512(r, src);
49323 let r = _mm512_mask_cvtph_ps(src, 0b00000000_11111111, a);
49324 let e = _mm512_setr_ps(
49325 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49326 );
49327 assert_eq_m512(r, e);
49328 }
49329
49330 #[simd_test(enable = "avx512f")]
49331 unsafe fn test_mm512_maskz_cvtph_ps() {
49332 let a = _mm256_setr_epi64x(
49333 4323521613979991040,
49334 4323521613979991040,
49335 4323521613979991040,
49336 4323521613979991040,
49337 );
49338 let r = _mm512_maskz_cvtph_ps(0, a);
49339 assert_eq_m512(r, _mm512_setzero_ps());
49340 let r = _mm512_maskz_cvtph_ps(0b00000000_11111111, a);
49341 let e = _mm512_setr_ps(
49342 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
49343 );
49344 assert_eq_m512(r, e);
49345 }
49346
49347 #[simd_test(enable = "avx512f,avx512vl")]
49348 unsafe fn test_mm256_mask_cvtph_ps() {
49349 let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49350 let src = _mm256_set1_ps(0.);
49351 let r = _mm256_mask_cvtph_ps(src, 0, a);
49352 assert_eq_m256(r, src);
49353 let r = _mm256_mask_cvtph_ps(src, 0b11111111, a);
49354 let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
49355 assert_eq_m256(r, e);
49356 }
49357
49358 #[simd_test(enable = "avx512f,avx512vl")]
49359 unsafe fn test_mm256_maskz_cvtph_ps() {
49360 let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49361 let r = _mm256_maskz_cvtph_ps(0, a);
49362 assert_eq_m256(r, _mm256_setzero_ps());
49363 let r = _mm256_maskz_cvtph_ps(0b11111111, a);
49364 let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
49365 assert_eq_m256(r, e);
49366 }
49367
49368 #[simd_test(enable = "avx512f,avx512vl")]
49369 unsafe fn test_mm_mask_cvtph_ps() {
49370 let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49371 let src = _mm_set1_ps(0.);
49372 let r = _mm_mask_cvtph_ps(src, 0, a);
49373 assert_eq_m128(r, src);
49374 let r = _mm_mask_cvtph_ps(src, 0b00001111, a);
49375 let e = _mm_setr_ps(1., 1., 1., 1.);
49376 assert_eq_m128(r, e);
49377 }
49378
49379 #[simd_test(enable = "avx512f,avx512vl")]
49380 unsafe fn test_mm_maskz_cvtph_ps() {
49381 let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
49382 let r = _mm_maskz_cvtph_ps(0, a);
49383 assert_eq_m128(r, _mm_setzero_ps());
49384 let r = _mm_maskz_cvtph_ps(0b00001111, a);
49385 let e = _mm_setr_ps(1., 1., 1., 1.);
49386 assert_eq_m128(r, e);
49387 }
49388
49389 #[simd_test(enable = "avx512f")]
49390 unsafe fn test_mm512_cvtt_roundps_epi32() {
49391 let a = _mm512_setr_ps(
49392 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49393 );
49394 let r = _mm512_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(a);
49395 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
49396 assert_eq_m512i(r, e);
49397 }
49398
49399 #[simd_test(enable = "avx512f")]
49400 unsafe fn test_mm512_mask_cvtt_roundps_epi32() {
49401 let a = _mm512_setr_ps(
49402 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49403 );
49404 let src = _mm512_set1_epi32(0);
49405 let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0, a);
49406 assert_eq_m512i(r, src);
49407 let r = _mm512_mask_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49408 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49409 assert_eq_m512i(r, e);
49410 }
49411
49412 #[simd_test(enable = "avx512f")]
49413 unsafe fn test_mm512_maskz_cvtt_roundps_epi32() {
49414 let a = _mm512_setr_ps(
49415 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49416 );
49417 let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0, a);
49418 assert_eq_m512i(r, _mm512_setzero_si512());
49419 let r = _mm512_maskz_cvtt_roundps_epi32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49420 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49421 assert_eq_m512i(r, e);
49422 }
49423
49424 #[simd_test(enable = "avx512f")]
49425 unsafe fn test_mm512_cvtt_roundps_epu32() {
49426 let a = _mm512_setr_ps(
49427 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49428 );
49429 let r = _mm512_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(a);
49430 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49431 assert_eq_m512i(r, e);
49432 }
49433
49434 #[simd_test(enable = "avx512f")]
49435 unsafe fn test_mm512_mask_cvtt_roundps_epu32() {
49436 let a = _mm512_setr_ps(
49437 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49438 );
49439 let src = _mm512_set1_epi32(0);
49440 let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0, a);
49441 assert_eq_m512i(r, src);
49442 let r = _mm512_mask_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(src, 0b00000000_11111111, a);
49443 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49444 assert_eq_m512i(r, e);
49445 }
49446
49447 #[simd_test(enable = "avx512f")]
49448 unsafe fn test_mm512_maskz_cvtt_roundps_epu32() {
49449 let a = _mm512_setr_ps(
49450 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49451 );
49452 let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0, a);
49453 assert_eq_m512i(r, _mm512_setzero_si512());
49454 let r = _mm512_maskz_cvtt_roundps_epu32::<_MM_FROUND_NO_EXC>(0b00000000_11111111, a);
49455 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49456 assert_eq_m512i(r, e);
49457 }
49458
49459 #[simd_test(enable = "avx512f")]
49460 unsafe fn test_mm512_cvttps_epi32() {
49461 let a = _mm512_setr_ps(
49462 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49463 );
49464 let r = _mm512_cvttps_epi32(a);
49465 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
49466 assert_eq_m512i(r, e);
49467 }
49468
49469 #[simd_test(enable = "avx512f")]
49470 unsafe fn test_mm512_mask_cvttps_epi32() {
49471 let a = _mm512_setr_ps(
49472 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49473 );
49474 let src = _mm512_set1_epi32(0);
49475 let r = _mm512_mask_cvttps_epi32(src, 0, a);
49476 assert_eq_m512i(r, src);
49477 let r = _mm512_mask_cvttps_epi32(src, 0b00000000_11111111, a);
49478 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49479 assert_eq_m512i(r, e);
49480 }
49481
49482 #[simd_test(enable = "avx512f")]
49483 unsafe fn test_mm512_maskz_cvttps_epi32() {
49484 let a = _mm512_setr_ps(
49485 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49486 );
49487 let r = _mm512_maskz_cvttps_epi32(0, a);
49488 assert_eq_m512i(r, _mm512_setzero_si512());
49489 let r = _mm512_maskz_cvttps_epi32(0b00000000_11111111, a);
49490 let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
49491 assert_eq_m512i(r, e);
49492 }
49493
49494 #[simd_test(enable = "avx512f,avx512vl")]
49495 unsafe fn test_mm256_mask_cvttps_epi32() {
49496 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49497 let src = _mm256_set1_epi32(0);
49498 let r = _mm256_mask_cvttps_epi32(src, 0, a);
49499 assert_eq_m256i(r, src);
49500 let r = _mm256_mask_cvttps_epi32(src, 0b11111111, a);
49501 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49502 assert_eq_m256i(r, e);
49503 }
49504
49505 #[simd_test(enable = "avx512f,avx512vl")]
49506 unsafe fn test_mm256_maskz_cvttps_epi32() {
49507 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49508 let r = _mm256_maskz_cvttps_epi32(0, a);
49509 assert_eq_m256i(r, _mm256_setzero_si256());
49510 let r = _mm256_maskz_cvttps_epi32(0b11111111, a);
49511 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49512 assert_eq_m256i(r, e);
49513 }
49514
49515 #[simd_test(enable = "avx512f,avx512vl")]
49516 unsafe fn test_mm_mask_cvttps_epi32() {
49517 let a = _mm_set_ps(12., 13.5, 14., 15.5);
49518 let src = _mm_set1_epi32(0);
49519 let r = _mm_mask_cvttps_epi32(src, 0, a);
49520 assert_eq_m128i(r, src);
49521 let r = _mm_mask_cvttps_epi32(src, 0b00001111, a);
49522 let e = _mm_set_epi32(12, 13, 14, 15);
49523 assert_eq_m128i(r, e);
49524 }
49525
49526 #[simd_test(enable = "avx512f,avx512vl")]
49527 unsafe fn test_mm_maskz_cvttps_epi32() {
49528 let a = _mm_set_ps(12., 13.5, 14., 15.5);
49529 let r = _mm_maskz_cvttps_epi32(0, a);
49530 assert_eq_m128i(r, _mm_setzero_si128());
49531 let r = _mm_maskz_cvttps_epi32(0b00001111, a);
49532 let e = _mm_set_epi32(12, 13, 14, 15);
49533 assert_eq_m128i(r, e);
49534 }
49535
49536 #[simd_test(enable = "avx512f")]
49537 unsafe fn test_mm512_cvttps_epu32() {
49538 let a = _mm512_setr_ps(
49539 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49540 );
49541 let r = _mm512_cvttps_epu32(a);
49542 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
49543 assert_eq_m512i(r, e);
49544 }
49545
49546 #[simd_test(enable = "avx512f")]
49547 unsafe fn test_mm512_mask_cvttps_epu32() {
49548 let a = _mm512_setr_ps(
49549 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49550 );
49551 let src = _mm512_set1_epi32(0);
49552 let r = _mm512_mask_cvttps_epu32(src, 0, a);
49553 assert_eq_m512i(r, src);
49554 let r = _mm512_mask_cvttps_epu32(src, 0b00000000_11111111, a);
49555 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49556 assert_eq_m512i(r, e);
49557 }
49558
49559 #[simd_test(enable = "avx512f")]
49560 unsafe fn test_mm512_maskz_cvttps_epu32() {
49561 let a = _mm512_setr_ps(
49562 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
49563 );
49564 let r = _mm512_maskz_cvttps_epu32(0, a);
49565 assert_eq_m512i(r, _mm512_setzero_si512());
49566 let r = _mm512_maskz_cvttps_epu32(0b00000000_11111111, a);
49567 let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
49568 assert_eq_m512i(r, e);
49569 }
49570
49571 #[simd_test(enable = "avx512f,avx512vl")]
49572 unsafe fn test_mm256_cvttps_epu32() {
49573 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49574 let r = _mm256_cvttps_epu32(a);
49575 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49576 assert_eq_m256i(r, e);
49577 }
49578
49579 #[simd_test(enable = "avx512f,avx512vl")]
49580 unsafe fn test_mm256_mask_cvttps_epu32() {
49581 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49582 let src = _mm256_set1_epi32(0);
49583 let r = _mm256_mask_cvttps_epu32(src, 0, a);
49584 assert_eq_m256i(r, src);
49585 let r = _mm256_mask_cvttps_epu32(src, 0b11111111, a);
49586 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49587 assert_eq_m256i(r, e);
49588 }
49589
49590 #[simd_test(enable = "avx512f,avx512vl")]
49591 unsafe fn test_mm256_maskz_cvttps_epu32() {
49592 let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
49593 let r = _mm256_maskz_cvttps_epu32(0, a);
49594 assert_eq_m256i(r, _mm256_setzero_si256());
49595 let r = _mm256_maskz_cvttps_epu32(0b11111111, a);
49596 let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
49597 assert_eq_m256i(r, e);
49598 }
49599
49600 #[simd_test(enable = "avx512f,avx512vl")]
49601 unsafe fn test_mm_cvttps_epu32() {
49602 let a = _mm_set_ps(12., 13.5, 14., 15.5);
49603 let r = _mm_cvttps_epu32(a);
49604 let e = _mm_set_epi32(12, 13, 14, 15);
49605 assert_eq_m128i(r, e);
49606 }
49607
49608 #[simd_test(enable = "avx512f,avx512vl")]
49609 unsafe fn test_mm_mask_cvttps_epu32() {
49610 let a = _mm_set_ps(12., 13.5, 14., 15.5);
49611 let src = _mm_set1_epi32(0);
49612 let r = _mm_mask_cvttps_epu32(src, 0, a);
49613 assert_eq_m128i(r, src);
49614 let r = _mm_mask_cvttps_epu32(src, 0b00001111, a);
49615 let e = _mm_set_epi32(12, 13, 14, 15);
49616 assert_eq_m128i(r, e);
49617 }
49618
49619 #[simd_test(enable = "avx512f,avx512vl")]
49620 unsafe fn test_mm_maskz_cvttps_epu32() {
49621 let a = _mm_set_ps(12., 13.5, 14., 15.5);
49622 let r = _mm_maskz_cvttps_epu32(0, a);
49623 assert_eq_m128i(r, _mm_setzero_si128());
49624 let r = _mm_maskz_cvttps_epu32(0b00001111, a);
49625 let e = _mm_set_epi32(12, 13, 14, 15);
49626 assert_eq_m128i(r, e);
49627 }
49628
49629 #[simd_test(enable = "avx512f")]
49630 unsafe fn test_mm512_i32gather_ps() {
49631 let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
49632 // A multiplier of 4 is word-addressing
49633 #[rustfmt::skip]
49634 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49635 120, 128, 136, 144, 152, 160, 168, 176);
49636 let r = _mm512_i32gather_ps::<4>(index, arr.as_ptr() as *const u8);
49637 #[rustfmt::skip]
49638 assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
49639 120., 128., 136., 144., 152., 160., 168., 176.));
49640 }
49641
49642 #[simd_test(enable = "avx512f")]
49643 unsafe fn test_mm512_mask_i32gather_ps() {
49644 let arr: [f32; 256] = core::array::from_fn(|i| i as f32);
49645 let src = _mm512_set1_ps(2.);
49646 let mask = 0b10101010_10101010;
49647 #[rustfmt::skip]
49648 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49649 120, 128, 136, 144, 152, 160, 168, 176);
49650 // A multiplier of 4 is word-addressing
49651 let r = _mm512_mask_i32gather_ps::<4>(src, mask, index, arr.as_ptr() as *const u8);
49652 #[rustfmt::skip]
49653 assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
49654 2., 128., 2., 144., 2., 160., 2., 176.));
49655 }
49656
49657 #[simd_test(enable = "avx512f")]
49658 unsafe fn test_mm512_i32gather_epi32() {
49659 let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
49660 // A multiplier of 4 is word-addressing
49661 #[rustfmt::skip]
49662 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49663 120, 128, 136, 144, 152, 160, 168, 176);
49664 let r = _mm512_i32gather_epi32::<4>(index, arr.as_ptr() as *const u8);
49665 #[rustfmt::skip]
49666 assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49667 120, 128, 136, 144, 152, 160, 168, 176));
49668 }
49669
49670 #[simd_test(enable = "avx512f")]
49671 unsafe fn test_mm512_mask_i32gather_epi32() {
49672 let arr: [i32; 256] = core::array::from_fn(|i| i as i32);
49673 let src = _mm512_set1_epi32(2);
49674 let mask = 0b10101010_10101010;
49675 let index = _mm512_setr_epi32(
49676 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
49677 );
49678 // A multiplier of 4 is word-addressing
49679 let r = _mm512_mask_i32gather_epi32::<4>(src, mask, index, arr.as_ptr() as *const u8);
49680 assert_eq_m512i(
49681 r,
49682 _mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, 2, 144, 2, 176, 2, 208, 2, 240),
49683 );
49684 }
49685
49686 #[simd_test(enable = "avx512f")]
49687 unsafe fn test_mm512_i32scatter_ps() {
49688 let mut arr = [0f32; 256];
49689 #[rustfmt::skip]
49690 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49691 128, 144, 160, 176, 192, 208, 224, 240);
49692 let src = _mm512_setr_ps(
49693 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
49694 );
49695 // A multiplier of 4 is word-addressing
49696 _mm512_i32scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, index, src);
49697 let mut expected = [0f32; 256];
49698 for i in 0..16 {
49699 expected[i * 16] = (i + 1) as f32;
49700 }
49701 assert_eq!(&arr[..], &expected[..],);
49702 }
49703
49704 #[simd_test(enable = "avx512f")]
49705 unsafe fn test_mm512_mask_i32scatter_ps() {
49706 let mut arr = [0f32; 256];
49707 let mask = 0b10101010_10101010;
49708 #[rustfmt::skip]
49709 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49710 128, 144, 160, 176, 192, 208, 224, 240);
49711 let src = _mm512_setr_ps(
49712 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
49713 );
49714 // A multiplier of 4 is word-addressing
49715 _mm512_mask_i32scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src);
49716 let mut expected = [0f32; 256];
49717 for i in 0..8 {
49718 expected[i * 32 + 16] = 2. * (i + 1) as f32;
49719 }
49720 assert_eq!(&arr[..], &expected[..],);
49721 }
49722
49723 #[simd_test(enable = "avx512f")]
49724 unsafe fn test_mm512_i32scatter_epi32() {
49725 let mut arr = [0i32; 256];
49726 #[rustfmt::skip]
49727
49728 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49729 128, 144, 160, 176, 192, 208, 224, 240);
49730 let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49731 // A multiplier of 4 is word-addressing
49732 _mm512_i32scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, index, src);
49733 let mut expected = [0i32; 256];
49734 for i in 0..16 {
49735 expected[i * 16] = (i + 1) as i32;
49736 }
49737 assert_eq!(&arr[..], &expected[..],);
49738 }
49739
49740 #[simd_test(enable = "avx512f")]
49741 unsafe fn test_mm512_mask_i32scatter_epi32() {
49742 let mut arr = [0i32; 256];
49743 let mask = 0b10101010_10101010;
49744 #[rustfmt::skip]
49745 let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
49746 128, 144, 160, 176, 192, 208, 224, 240);
49747 let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
49748 // A multiplier of 4 is word-addressing
49749 _mm512_mask_i32scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src);
49750 let mut expected = [0i32; 256];
49751 for i in 0..8 {
49752 expected[i * 32 + 16] = 2 * (i + 1) as i32;
49753 }
49754 assert_eq!(&arr[..], &expected[..],);
49755 }
49756
49757 #[simd_test(enable = "avx512f")]
49758 unsafe fn test_mm512_cmplt_ps_mask() {
49759 #[rustfmt::skip]
49760 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49761 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49762 let b = _mm512_set1_ps(-1.);
49763 let m = _mm512_cmplt_ps_mask(a, b);
49764 assert_eq!(m, 0b00000101_00000101);
49765 }
49766
49767 #[simd_test(enable = "avx512f")]
49768 unsafe fn test_mm512_mask_cmplt_ps_mask() {
49769 #[rustfmt::skip]
49770 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49771 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49772 let b = _mm512_set1_ps(-1.);
49773 let mask = 0b01100110_01100110;
49774 let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
49775 assert_eq!(r, 0b00000100_00000100);
49776 }
49777
49778 #[simd_test(enable = "avx512f")]
49779 unsafe fn test_mm512_cmpnlt_ps_mask() {
49780 #[rustfmt::skip]
49781 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49782 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49783 let b = _mm512_set1_ps(-1.);
49784 assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
49785 }
49786
49787 #[simd_test(enable = "avx512f")]
49788 unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
49789 #[rustfmt::skip]
49790 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49791 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49792 let b = _mm512_set1_ps(-1.);
49793 let mask = 0b01111010_01111010;
49794 assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
49795 }
49796
49797 #[simd_test(enable = "avx512f")]
49798 unsafe fn test_mm512_cmpnle_ps_mask() {
49799 #[rustfmt::skip]
49800 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49801 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49802 let b = _mm512_set1_ps(-1.);
49803 let m = _mm512_cmpnle_ps_mask(b, a);
49804 assert_eq!(m, 0b00001101_00001101);
49805 }
49806
49807 #[simd_test(enable = "avx512f")]
49808 unsafe fn test_mm512_mask_cmpnle_ps_mask() {
49809 #[rustfmt::skip]
49810 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49811 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49812 let b = _mm512_set1_ps(-1.);
49813 let mask = 0b01100110_01100110;
49814 let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
49815 assert_eq!(r, 0b00000100_00000100);
49816 }
49817
49818 #[simd_test(enable = "avx512f")]
49819 unsafe fn test_mm512_cmple_ps_mask() {
49820 #[rustfmt::skip]
49821 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49822 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49823 let b = _mm512_set1_ps(-1.);
49824 assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
49825 }
49826
49827 #[simd_test(enable = "avx512f")]
49828 unsafe fn test_mm512_mask_cmple_ps_mask() {
49829 #[rustfmt::skip]
49830 let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
49831 0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
49832 let b = _mm512_set1_ps(-1.);
49833 let mask = 0b01111010_01111010;
49834 assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
49835 }
49836
49837 #[simd_test(enable = "avx512f")]
49838 unsafe fn test_mm512_cmpeq_ps_mask() {
49839 #[rustfmt::skip]
49840 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49841 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49842 #[rustfmt::skip]
49843 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49844 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49845 let m = _mm512_cmpeq_ps_mask(b, a);
49846 assert_eq!(m, 0b11001101_11001101);
49847 }
49848
49849 #[simd_test(enable = "avx512f")]
49850 unsafe fn test_mm512_mask_cmpeq_ps_mask() {
49851 #[rustfmt::skip]
49852 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49853 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49854 #[rustfmt::skip]
49855 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49856 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49857 let mask = 0b01111010_01111010;
49858 let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
49859 assert_eq!(r, 0b01001000_01001000);
49860 }
49861
49862 #[simd_test(enable = "avx512f")]
49863 unsafe fn test_mm512_cmpneq_ps_mask() {
49864 #[rustfmt::skip]
49865 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49866 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49867 #[rustfmt::skip]
49868 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49869 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49870 let m = _mm512_cmpneq_ps_mask(b, a);
49871 assert_eq!(m, 0b00110010_00110010);
49872 }
49873
49874 #[simd_test(enable = "avx512f")]
49875 unsafe fn test_mm512_mask_cmpneq_ps_mask() {
49876 #[rustfmt::skip]
49877 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
49878 0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
49879 #[rustfmt::skip]
49880 let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
49881 0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
49882 let mask = 0b01111010_01111010;
49883 let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
49884 assert_eq!(r, 0b00110010_00110010)
49885 }
49886
49887 #[simd_test(enable = "avx512f")]
49888 unsafe fn test_mm512_cmp_ps_mask() {
49889 #[rustfmt::skip]
49890 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49891 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49892 let b = _mm512_set1_ps(-1.);
49893 let m = _mm512_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49894 assert_eq!(m, 0b00000101_00000101);
49895 }
49896
49897 #[simd_test(enable = "avx512f")]
49898 unsafe fn test_mm512_mask_cmp_ps_mask() {
49899 #[rustfmt::skip]
49900 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49901 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49902 let b = _mm512_set1_ps(-1.);
49903 let mask = 0b01100110_01100110;
49904 let r = _mm512_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49905 assert_eq!(r, 0b00000100_00000100);
49906 }
49907
49908 #[simd_test(enable = "avx512f,avx512vl")]
49909 unsafe fn test_mm256_cmp_ps_mask() {
49910 let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49911 let b = _mm256_set1_ps(-1.);
49912 let m = _mm256_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49913 assert_eq!(m, 0b00000101);
49914 }
49915
49916 #[simd_test(enable = "avx512f,avx512vl")]
49917 unsafe fn test_mm256_mask_cmp_ps_mask() {
49918 let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49919 let b = _mm256_set1_ps(-1.);
49920 let mask = 0b01100110;
49921 let r = _mm256_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49922 assert_eq!(r, 0b00000100);
49923 }
49924
49925 #[simd_test(enable = "avx512f,avx512vl")]
49926 unsafe fn test_mm_cmp_ps_mask() {
49927 let a = _mm_set_ps(0., 1., -1., 13.);
49928 let b = _mm_set1_ps(1.);
49929 let m = _mm_cmp_ps_mask::<_CMP_LT_OQ>(a, b);
49930 assert_eq!(m, 0b00001010);
49931 }
49932
49933 #[simd_test(enable = "avx512f,avx512vl")]
49934 unsafe fn test_mm_mask_cmp_ps_mask() {
49935 let a = _mm_set_ps(0., 1., -1., 13.);
49936 let b = _mm_set1_ps(1.);
49937 let mask = 0b11111111;
49938 let r = _mm_mask_cmp_ps_mask::<_CMP_LT_OQ>(mask, a, b);
49939 assert_eq!(r, 0b00001010);
49940 }
49941
49942 #[simd_test(enable = "avx512f")]
49943 unsafe fn test_mm512_cmp_round_ps_mask() {
49944 #[rustfmt::skip]
49945 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49946 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49947 let b = _mm512_set1_ps(-1.);
49948 let m = _mm512_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b);
49949 assert_eq!(m, 0b00000101_00000101);
49950 }
49951
49952 #[simd_test(enable = "avx512f")]
49953 unsafe fn test_mm512_mask_cmp_round_ps_mask() {
49954 #[rustfmt::skip]
49955 let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
49956 0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
49957 let b = _mm512_set1_ps(-1.);
49958 let mask = 0b01100110_01100110;
49959 let r = _mm512_mask_cmp_round_ps_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b);
49960 assert_eq!(r, 0b00000100_00000100);
49961 }
49962
49963 #[simd_test(enable = "avx512f")]
49964 unsafe fn test_mm512_cmpord_ps_mask() {
49965 #[rustfmt::skip]
49966 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
49967 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
49968 #[rustfmt::skip]
49969 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
49970 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
49971 let m = _mm512_cmpord_ps_mask(a, b);
49972 assert_eq!(m, 0b00000101_00000101);
49973 }
49974
49975 #[simd_test(enable = "avx512f")]
49976 unsafe fn test_mm512_mask_cmpord_ps_mask() {
49977 #[rustfmt::skip]
49978 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
49979 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
49980 #[rustfmt::skip]
49981 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
49982 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
49983 let mask = 0b11000011_11000011;
49984 let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
49985 assert_eq!(m, 0b00000001_00000001);
49986 }
49987
49988 #[simd_test(enable = "avx512f")]
49989 unsafe fn test_mm512_cmpunord_ps_mask() {
49990 #[rustfmt::skip]
49991 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
49992 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
49993 #[rustfmt::skip]
49994 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
49995 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
49996 let m = _mm512_cmpunord_ps_mask(a, b);
49997
49998 assert_eq!(m, 0b11111010_11111010);
49999 }
50000
50001 #[simd_test(enable = "avx512f")]
50002 unsafe fn test_mm512_mask_cmpunord_ps_mask() {
50003 #[rustfmt::skip]
50004 let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
50005 f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
50006 #[rustfmt::skip]
50007 let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
50008 f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
50009 let mask = 0b00001111_00001111;
50010 let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
50011 assert_eq!(m, 0b000001010_00001010);
50012 }
50013
50014 #[simd_test(enable = "avx512f")]
50015 unsafe fn test_mm_cmp_ss_mask() {
50016 let a = _mm_setr_ps(2., 1., 1., 1.);
50017 let b = _mm_setr_ps(1., 2., 2., 2.);
50018 let m = _mm_cmp_ss_mask::<_CMP_GE_OS>(a, b);
50019 assert_eq!(m, 1);
50020 }
50021
50022 #[simd_test(enable = "avx512f")]
50023 unsafe fn test_mm_mask_cmp_ss_mask() {
50024 let a = _mm_setr_ps(2., 1., 1., 1.);
50025 let b = _mm_setr_ps(1., 2., 2., 2.);
50026 let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b10, a, b);
50027 assert_eq!(m, 0);
50028 let m = _mm_mask_cmp_ss_mask::<_CMP_GE_OS>(0b1, a, b);
50029 assert_eq!(m, 1);
50030 }
50031
50032 #[simd_test(enable = "avx512f")]
50033 unsafe fn test_mm_cmp_round_ss_mask() {
50034 let a = _mm_setr_ps(2., 1., 1., 1.);
50035 let b = _mm_setr_ps(1., 2., 2., 2.);
50036 let m = _mm_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
50037 assert_eq!(m, 1);
50038 }
50039
50040 #[simd_test(enable = "avx512f")]
50041 unsafe fn test_mm_mask_cmp_round_ss_mask() {
50042 let a = _mm_setr_ps(2., 1., 1., 1.);
50043 let b = _mm_setr_ps(1., 2., 2., 2.);
50044 let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
50045 assert_eq!(m, 0);
50046 let m = _mm_mask_cmp_round_ss_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
50047 assert_eq!(m, 1);
50048 }
50049
50050 #[simd_test(enable = "avx512f")]
50051 unsafe fn test_mm_cmp_sd_mask() {
50052 let a = _mm_setr_pd(2., 1.);
50053 let b = _mm_setr_pd(1., 2.);
50054 let m = _mm_cmp_sd_mask::<_CMP_GE_OS>(a, b);
50055 assert_eq!(m, 1);
50056 }
50057
50058 #[simd_test(enable = "avx512f")]
50059 unsafe fn test_mm_mask_cmp_sd_mask() {
50060 let a = _mm_setr_pd(2., 1.);
50061 let b = _mm_setr_pd(1., 2.);
50062 let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b10, a, b);
50063 assert_eq!(m, 0);
50064 let m = _mm_mask_cmp_sd_mask::<_CMP_GE_OS>(0b1, a, b);
50065 assert_eq!(m, 1);
50066 }
50067
50068 #[simd_test(enable = "avx512f")]
50069 unsafe fn test_mm_cmp_round_sd_mask() {
50070 let a = _mm_setr_pd(2., 1.);
50071 let b = _mm_setr_pd(1., 2.);
50072 let m = _mm_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(a, b);
50073 assert_eq!(m, 1);
50074 }
50075
50076 #[simd_test(enable = "avx512f")]
50077 unsafe fn test_mm_mask_cmp_round_sd_mask() {
50078 let a = _mm_setr_pd(2., 1.);
50079 let b = _mm_setr_pd(1., 2.);
50080 let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b10, a, b);
50081 assert_eq!(m, 0);
50082 let m = _mm_mask_cmp_round_sd_mask::<_CMP_GE_OS, _MM_FROUND_CUR_DIRECTION>(0b1, a, b);
50083 assert_eq!(m, 1);
50084 }
50085
50086 #[simd_test(enable = "avx512f")]
50087 unsafe fn test_mm512_cmplt_epu32_mask() {
50088 #[rustfmt::skip]
50089 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50090 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50091 let b = _mm512_set1_epi32(-1);
50092 let m = _mm512_cmplt_epu32_mask(a, b);
50093 assert_eq!(m, 0b11001111_11001111);
50094 }
50095
50096 #[simd_test(enable = "avx512f")]
50097 unsafe fn test_mm512_mask_cmplt_epu32_mask() {
50098 #[rustfmt::skip]
50099 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50100 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50101 let b = _mm512_set1_epi32(-1);
50102 let mask = 0b01111010_01111010;
50103 let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
50104 assert_eq!(r, 0b01001010_01001010);
50105 }
50106
50107 #[simd_test(enable = "avx512f,avx512vl")]
50108 unsafe fn test_mm256_cmplt_epu32_mask() {
50109 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
50110 let b = _mm256_set1_epi32(1);
50111 let r = _mm256_cmplt_epu32_mask(a, b);
50112 assert_eq!(r, 0b10000000);
50113 }
50114
50115 #[simd_test(enable = "avx512f,avx512vl")]
50116 unsafe fn test_mm256_mask_cmplt_epu32_mask() {
50117 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
50118 let b = _mm256_set1_epi32(1);
50119 let mask = 0b11111111;
50120 let r = _mm256_mask_cmplt_epu32_mask(mask, a, b);
50121 assert_eq!(r, 0b10000000);
50122 }
50123
50124 #[simd_test(enable = "avx512f,avx512vl")]
50125 unsafe fn test_mm_cmplt_epu32_mask() {
50126 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50127 let b = _mm_set1_epi32(1);
50128 let r = _mm_cmplt_epu32_mask(a, b);
50129 assert_eq!(r, 0b00001000);
50130 }
50131
50132 #[simd_test(enable = "avx512f,avx512vl")]
50133 unsafe fn test_mm_mask_cmplt_epu32_mask() {
50134 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50135 let b = _mm_set1_epi32(1);
50136 let mask = 0b11111111;
50137 let r = _mm_mask_cmplt_epu32_mask(mask, a, b);
50138 assert_eq!(r, 0b00001000);
50139 }
50140
50141 #[simd_test(enable = "avx512f")]
50142 unsafe fn test_mm512_cmpgt_epu32_mask() {
50143 #[rustfmt::skip]
50144 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50145 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50146 let b = _mm512_set1_epi32(-1);
50147 let m = _mm512_cmpgt_epu32_mask(b, a);
50148 assert_eq!(m, 0b11001111_11001111);
50149 }
50150
50151 #[simd_test(enable = "avx512f")]
50152 unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
50153 #[rustfmt::skip]
50154 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50155 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50156 let b = _mm512_set1_epi32(-1);
50157 let mask = 0b01111010_01111010;
50158 let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
50159 assert_eq!(r, 0b01001010_01001010);
50160 }
50161
50162 #[simd_test(enable = "avx512f,avx512vl")]
50163 unsafe fn test_mm256_cmpgt_epu32_mask() {
50164 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
50165 let b = _mm256_set1_epi32(1);
50166 let r = _mm256_cmpgt_epu32_mask(a, b);
50167 assert_eq!(r, 0b00111111);
50168 }
50169
50170 #[simd_test(enable = "avx512f,avx512vl")]
50171 unsafe fn test_mm256_mask_cmpgt_epu32_mask() {
50172 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
50173 let b = _mm256_set1_epi32(1);
50174 let mask = 0b11111111;
50175 let r = _mm256_mask_cmpgt_epu32_mask(mask, a, b);
50176 assert_eq!(r, 0b00111111);
50177 }
50178
50179 #[simd_test(enable = "avx512f,avx512vl")]
50180 unsafe fn test_mm_cmpgt_epu32_mask() {
50181 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50182 let b = _mm_set1_epi32(1);
50183 let r = _mm_cmpgt_epu32_mask(a, b);
50184 assert_eq!(r, 0b00000011);
50185 }
50186
50187 #[simd_test(enable = "avx512f,avx512vl")]
50188 unsafe fn test_mm_mask_cmpgt_epu32_mask() {
50189 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50190 let b = _mm_set1_epi32(1);
50191 let mask = 0b11111111;
50192 let r = _mm_mask_cmpgt_epu32_mask(mask, a, b);
50193 assert_eq!(r, 0b00000011);
50194 }
50195
50196 #[simd_test(enable = "avx512f")]
50197 unsafe fn test_mm512_cmple_epu32_mask() {
50198 #[rustfmt::skip]
50199 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50200 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50201 let b = _mm512_set1_epi32(-1);
50202 assert_eq!(
50203 _mm512_cmple_epu32_mask(a, b),
50204 !_mm512_cmpgt_epu32_mask(a, b)
50205 )
50206 }
50207
50208 #[simd_test(enable = "avx512f")]
50209 unsafe fn test_mm512_mask_cmple_epu32_mask() {
50210 #[rustfmt::skip]
50211 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50212 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50213 let b = _mm512_set1_epi32(-1);
50214 let mask = 0b01111010_01111010;
50215 assert_eq!(
50216 _mm512_mask_cmple_epu32_mask(mask, a, b),
50217 0b01111010_01111010
50218 );
50219 }
50220
50221 #[simd_test(enable = "avx512f,avx512vl")]
50222 unsafe fn test_mm256_cmple_epu32_mask() {
50223 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
50224 let b = _mm256_set1_epi32(1);
50225 let r = _mm256_cmple_epu32_mask(a, b);
50226 assert_eq!(r, 0b11000000)
50227 }
50228
50229 #[simd_test(enable = "avx512f,avx512vl")]
50230 unsafe fn test_mm256_mask_cmple_epu32_mask() {
50231 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
50232 let b = _mm256_set1_epi32(1);
50233 let mask = 0b11111111;
50234 let r = _mm256_mask_cmple_epu32_mask(mask, a, b);
50235 assert_eq!(r, 0b11000000)
50236 }
50237
50238 #[simd_test(enable = "avx512f,avx512vl")]
50239 unsafe fn test_mm_cmple_epu32_mask() {
50240 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50241 let b = _mm_set1_epi32(1);
50242 let r = _mm_cmple_epu32_mask(a, b);
50243 assert_eq!(r, 0b00001100)
50244 }
50245
50246 #[simd_test(enable = "avx512f,avx512vl")]
50247 unsafe fn test_mm_mask_cmple_epu32_mask() {
50248 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50249 let b = _mm_set1_epi32(1);
50250 let mask = 0b11111111;
50251 let r = _mm_mask_cmple_epu32_mask(mask, a, b);
50252 assert_eq!(r, 0b00001100)
50253 }
50254
50255 #[simd_test(enable = "avx512f")]
50256 unsafe fn test_mm512_cmpge_epu32_mask() {
50257 #[rustfmt::skip]
50258 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50259 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50260 let b = _mm512_set1_epi32(-1);
50261 assert_eq!(
50262 _mm512_cmpge_epu32_mask(a, b),
50263 !_mm512_cmplt_epu32_mask(a, b)
50264 )
50265 }
50266
50267 #[simd_test(enable = "avx512f")]
50268 unsafe fn test_mm512_mask_cmpge_epu32_mask() {
50269 #[rustfmt::skip]
50270 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50271 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50272 let b = _mm512_set1_epi32(-1);
50273 let mask = 0b01111010_01111010;
50274 assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
50275 }
50276
50277 #[simd_test(enable = "avx512f,avx512vl")]
50278 unsafe fn test_mm256_cmpge_epu32_mask() {
50279 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
50280 let b = _mm256_set1_epi32(1);
50281 let r = _mm256_cmpge_epu32_mask(a, b);
50282 assert_eq!(r, 0b01111111)
50283 }
50284
50285 #[simd_test(enable = "avx512f,avx512vl")]
50286 unsafe fn test_mm256_mask_cmpge_epu32_mask() {
50287 let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
50288 let b = _mm256_set1_epi32(1);
50289 let mask = 0b11111111;
50290 let r = _mm256_mask_cmpge_epu32_mask(mask, a, b);
50291 assert_eq!(r, 0b01111111)
50292 }
50293
50294 #[simd_test(enable = "avx512f,avx512vl")]
50295 unsafe fn test_mm_cmpge_epu32_mask() {
50296 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50297 let b = _mm_set1_epi32(1);
50298 let r = _mm_cmpge_epu32_mask(a, b);
50299 assert_eq!(r, 0b00000111)
50300 }
50301
50302 #[simd_test(enable = "avx512f,avx512vl")]
50303 unsafe fn test_mm_mask_cmpge_epu32_mask() {
50304 let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
50305 let b = _mm_set1_epi32(1);
50306 let mask = 0b11111111;
50307 let r = _mm_mask_cmpge_epu32_mask(mask, a, b);
50308 assert_eq!(r, 0b00000111)
50309 }
50310
50311 #[simd_test(enable = "avx512f")]
50312 unsafe fn test_mm512_cmpeq_epu32_mask() {
50313 #[rustfmt::skip]
50314 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50315 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50316 #[rustfmt::skip]
50317 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50318 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50319 let m = _mm512_cmpeq_epu32_mask(b, a);
50320 assert_eq!(m, 0b11001111_11001111);
50321 }
50322
50323 #[simd_test(enable = "avx512f")]
50324 unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
50325 #[rustfmt::skip]
50326 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50327 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50328 #[rustfmt::skip]
50329 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50330 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50331 let mask = 0b01111010_01111010;
50332 let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
50333 assert_eq!(r, 0b01001010_01001010);
50334 }
50335
50336 #[simd_test(enable = "avx512f,avx512vl")]
50337 unsafe fn test_mm256_cmpeq_epu32_mask() {
50338 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50339 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50340 let m = _mm256_cmpeq_epu32_mask(b, a);
50341 assert_eq!(m, 0b11001111);
50342 }
50343
50344 #[simd_test(enable = "avx512f,avx512vl")]
50345 unsafe fn test_mm256_mask_cmpeq_epu32_mask() {
50346 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50347 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50348 let mask = 0b01111010;
50349 let r = _mm256_mask_cmpeq_epu32_mask(mask, b, a);
50350 assert_eq!(r, 0b01001010);
50351 }
50352
50353 #[simd_test(enable = "avx512f,avx512vl")]
50354 unsafe fn test_mm_cmpeq_epu32_mask() {
50355 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50356 let b = _mm_set_epi32(0, 1, 13, 42);
50357 let m = _mm_cmpeq_epu32_mask(b, a);
50358 assert_eq!(m, 0b00001100);
50359 }
50360
50361 #[simd_test(enable = "avx512f,avx512vl")]
50362 unsafe fn test_mm_mask_cmpeq_epu32_mask() {
50363 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50364 let b = _mm_set_epi32(0, 1, 13, 42);
50365 let mask = 0b11111111;
50366 let r = _mm_mask_cmpeq_epu32_mask(mask, b, a);
50367 assert_eq!(r, 0b00001100);
50368 }
50369
50370 #[simd_test(enable = "avx512f")]
50371 unsafe fn test_mm512_cmpneq_epu32_mask() {
50372 #[rustfmt::skip]
50373 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50374 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50375 #[rustfmt::skip]
50376 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50377 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50378 let m = _mm512_cmpneq_epu32_mask(b, a);
50379 assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
50380 }
50381
50382 #[simd_test(enable = "avx512f")]
50383 unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
50384 #[rustfmt::skip]
50385 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
50386 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50387 #[rustfmt::skip]
50388 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50389 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50390 let mask = 0b01111010_01111010;
50391 let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
50392 assert_eq!(r, 0b00110010_00110010);
50393 }
50394
50395 #[simd_test(enable = "avx512f,avx512vl")]
50396 unsafe fn test_mm256_cmpneq_epu32_mask() {
50397 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50398 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
50399 let r = _mm256_cmpneq_epu32_mask(b, a);
50400 assert_eq!(r, 0b00110000);
50401 }
50402
50403 #[simd_test(enable = "avx512f,avx512vl")]
50404 unsafe fn test_mm256_mask_cmpneq_epu32_mask() {
50405 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
50406 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
50407 let mask = 0b11111111;
50408 let r = _mm256_mask_cmpneq_epu32_mask(mask, b, a);
50409 assert_eq!(r, 0b00110000);
50410 }
50411
50412 #[simd_test(enable = "avx512f,avx512vl")]
50413 unsafe fn test_mm_cmpneq_epu32_mask() {
50414 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50415 let b = _mm_set_epi32(0, 1, 13, 42);
50416 let r = _mm_cmpneq_epu32_mask(b, a);
50417 assert_eq!(r, 0b00000011);
50418 }
50419
50420 #[simd_test(enable = "avx512f,avx512vl")]
50421 unsafe fn test_mm_mask_cmpneq_epu32_mask() {
50422 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50423 let b = _mm_set_epi32(0, 1, 13, 42);
50424 let mask = 0b11111111;
50425 let r = _mm_mask_cmpneq_epu32_mask(mask, b, a);
50426 assert_eq!(r, 0b00000011);
50427 }
50428
50429 #[simd_test(enable = "avx512f")]
50430 unsafe fn test_mm512_cmp_epu32_mask() {
50431 #[rustfmt::skip]
50432 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50433 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50434 let b = _mm512_set1_epi32(-1);
50435 let m = _mm512_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50436 assert_eq!(m, 0b11001111_11001111);
50437 }
50438
50439 #[simd_test(enable = "avx512f")]
50440 unsafe fn test_mm512_mask_cmp_epu32_mask() {
50441 #[rustfmt::skip]
50442 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50443 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50444 let b = _mm512_set1_epi32(-1);
50445 let mask = 0b01111010_01111010;
50446 let r = _mm512_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50447 assert_eq!(r, 0b01001010_01001010);
50448 }
50449
50450 #[simd_test(enable = "avx512f,avx512vl")]
50451 unsafe fn test_mm256_cmp_epu32_mask() {
50452 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50453 let b = _mm256_set1_epi32(-1);
50454 let m = _mm256_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50455 assert_eq!(m, 0b11001111);
50456 }
50457
50458 #[simd_test(enable = "avx512f,avx512vl")]
50459 unsafe fn test_mm256_mask_cmp_epu32_mask() {
50460 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50461 let b = _mm256_set1_epi32(-1);
50462 let mask = 0b11111111;
50463 let r = _mm256_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50464 assert_eq!(r, 0b11001111);
50465 }
50466
50467 #[simd_test(enable = "avx512f,avx512vl")]
50468 unsafe fn test_mm_cmp_epu32_mask() {
50469 let a = _mm_set_epi32(0, 1, -1, i32::MAX);
50470 let b = _mm_set1_epi32(1);
50471 let m = _mm_cmp_epu32_mask::<_MM_CMPINT_LT>(a, b);
50472 assert_eq!(m, 0b00001000);
50473 }
50474
50475 #[simd_test(enable = "avx512f,avx512vl")]
50476 unsafe fn test_mm_mask_cmp_epu32_mask() {
50477 let a = _mm_set_epi32(0, 1, -1, i32::MAX);
50478 let b = _mm_set1_epi32(1);
50479 let mask = 0b11111111;
50480 let r = _mm_mask_cmp_epu32_mask::<_MM_CMPINT_LT>(mask, a, b);
50481 assert_eq!(r, 0b00001000);
50482 }
50483
50484 #[simd_test(enable = "avx512f")]
50485 unsafe fn test_mm512_cmplt_epi32_mask() {
50486 #[rustfmt::skip]
50487 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50488 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50489 let b = _mm512_set1_epi32(-1);
50490 let m = _mm512_cmplt_epi32_mask(a, b);
50491 assert_eq!(m, 0b00000101_00000101);
50492 }
50493
50494 #[simd_test(enable = "avx512f")]
50495 unsafe fn test_mm512_mask_cmplt_epi32_mask() {
50496 #[rustfmt::skip]
50497 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50498 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50499 let b = _mm512_set1_epi32(-1);
50500 let mask = 0b01100110_01100110;
50501 let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
50502 assert_eq!(r, 0b00000100_00000100);
50503 }
50504
50505 #[simd_test(enable = "avx512f,avx512vl")]
50506 unsafe fn test_mm256_cmplt_epi32_mask() {
50507 let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
50508 let b = _mm256_set1_epi32(-1);
50509 let r = _mm256_cmplt_epi32_mask(a, b);
50510 assert_eq!(r, 0b00000101);
50511 }
50512
50513 #[simd_test(enable = "avx512f,avx512vl")]
50514 unsafe fn test_mm256_mask_cmplt_epi32_mask() {
50515 let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
50516 let b = _mm256_set1_epi32(-1);
50517 let mask = 0b11111111;
50518 let r = _mm256_mask_cmplt_epi32_mask(mask, a, b);
50519 assert_eq!(r, 0b00000101);
50520 }
50521
50522 #[simd_test(enable = "avx512f,avx512vl")]
50523 unsafe fn test_mm_cmplt_epi32_mask() {
50524 let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
50525 let b = _mm_set1_epi32(-1);
50526 let r = _mm_cmplt_epi32_mask(a, b);
50527 assert_eq!(r, 0b00000101);
50528 }
50529
50530 #[simd_test(enable = "avx512f,avx512vl")]
50531 unsafe fn test_mm_mask_cmplt_epi32_mask() {
50532 let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
50533 let b = _mm_set1_epi32(-1);
50534 let mask = 0b11111111;
50535 let r = _mm_mask_cmplt_epi32_mask(mask, a, b);
50536 assert_eq!(r, 0b00000101);
50537 }
50538
50539 #[simd_test(enable = "avx512f")]
50540 unsafe fn test_mm512_cmpgt_epi32_mask() {
50541 #[rustfmt::skip]
50542 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50543 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50544 let b = _mm512_set1_epi32(-1);
50545 let m = _mm512_cmpgt_epi32_mask(b, a);
50546 assert_eq!(m, 0b00000101_00000101);
50547 }
50548
50549 #[simd_test(enable = "avx512f")]
50550 unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
50551 #[rustfmt::skip]
50552 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50553 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50554 let b = _mm512_set1_epi32(-1);
50555 let mask = 0b01100110_01100110;
50556 let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
50557 assert_eq!(r, 0b00000100_00000100);
50558 }
50559
50560 #[simd_test(enable = "avx512f,avx512vl")]
50561 unsafe fn test_mm256_cmpgt_epi32_mask() {
50562 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50563 let b = _mm256_set1_epi32(-1);
50564 let r = _mm256_cmpgt_epi32_mask(a, b);
50565 assert_eq!(r, 0b11011010);
50566 }
50567
50568 #[simd_test(enable = "avx512f,avx512vl")]
50569 unsafe fn test_mm256_mask_cmpgt_epi32_mask() {
50570 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50571 let b = _mm256_set1_epi32(-1);
50572 let mask = 0b11111111;
50573 let r = _mm256_mask_cmpgt_epi32_mask(mask, a, b);
50574 assert_eq!(r, 0b11011010);
50575 }
50576
50577 #[simd_test(enable = "avx512f,avx512vl")]
50578 unsafe fn test_mm_cmpgt_epi32_mask() {
50579 let a = _mm_set_epi32(0, 1, -1, 13);
50580 let b = _mm_set1_epi32(-1);
50581 let r = _mm_cmpgt_epi32_mask(a, b);
50582 assert_eq!(r, 0b00001101);
50583 }
50584
50585 #[simd_test(enable = "avx512f,avx512vl")]
50586 unsafe fn test_mm_mask_cmpgt_epi32_mask() {
50587 let a = _mm_set_epi32(0, 1, -1, 13);
50588 let b = _mm_set1_epi32(-1);
50589 let mask = 0b11111111;
50590 let r = _mm_mask_cmpgt_epi32_mask(mask, a, b);
50591 assert_eq!(r, 0b00001101);
50592 }
50593
50594 #[simd_test(enable = "avx512f")]
50595 unsafe fn test_mm512_cmple_epi32_mask() {
50596 #[rustfmt::skip]
50597 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50598 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50599 let b = _mm512_set1_epi32(-1);
50600 assert_eq!(
50601 _mm512_cmple_epi32_mask(a, b),
50602 !_mm512_cmpgt_epi32_mask(a, b)
50603 )
50604 }
50605
50606 #[simd_test(enable = "avx512f")]
50607 unsafe fn test_mm512_mask_cmple_epi32_mask() {
50608 #[rustfmt::skip]
50609 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50610 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50611 let b = _mm512_set1_epi32(-1);
50612 let mask = 0b01111010_01111010;
50613 assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
50614 }
50615
50616 #[simd_test(enable = "avx512f,avx512vl")]
50617 unsafe fn test_mm256_cmple_epi32_mask() {
50618 let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
50619 let b = _mm256_set1_epi32(-1);
50620 let r = _mm256_cmple_epi32_mask(a, b);
50621 assert_eq!(r, 0b00100101)
50622 }
50623
50624 #[simd_test(enable = "avx512f,avx512vl")]
50625 unsafe fn test_mm256_mask_cmple_epi32_mask() {
50626 let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
50627 let b = _mm256_set1_epi32(-1);
50628 let mask = 0b11111111;
50629 let r = _mm256_mask_cmple_epi32_mask(mask, a, b);
50630 assert_eq!(r, 0b00100101)
50631 }
50632
50633 #[simd_test(enable = "avx512f,avx512vl")]
50634 unsafe fn test_mm_cmple_epi32_mask() {
50635 let a = _mm_set_epi32(0, 1, -1, 200);
50636 let b = _mm_set1_epi32(-1);
50637 let r = _mm_cmple_epi32_mask(a, b);
50638 assert_eq!(r, 0b00000010)
50639 }
50640
50641 #[simd_test(enable = "avx512f,avx512vl")]
50642 unsafe fn test_mm_mask_cmple_epi32_mask() {
50643 let a = _mm_set_epi32(0, 1, -1, 200);
50644 let b = _mm_set1_epi32(-1);
50645 let mask = 0b11111111;
50646 let r = _mm_mask_cmple_epi32_mask(mask, a, b);
50647 assert_eq!(r, 0b00000010)
50648 }
50649
50650 #[simd_test(enable = "avx512f")]
50651 unsafe fn test_mm512_cmpge_epi32_mask() {
50652 #[rustfmt::skip]
50653 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50654 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50655 let b = _mm512_set1_epi32(-1);
50656 assert_eq!(
50657 _mm512_cmpge_epi32_mask(a, b),
50658 !_mm512_cmplt_epi32_mask(a, b)
50659 )
50660 }
50661
50662 #[simd_test(enable = "avx512f")]
50663 unsafe fn test_mm512_mask_cmpge_epi32_mask() {
50664 #[rustfmt::skip]
50665 let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
50666 0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50667 let b = _mm512_set1_epi32(-1);
50668 let mask = 0b01111010_01111010;
50669 assert_eq!(
50670 _mm512_mask_cmpge_epi32_mask(mask, a, b),
50671 0b01111010_01111010
50672 );
50673 }
50674
50675 #[simd_test(enable = "avx512f,avx512vl")]
50676 unsafe fn test_mm256_cmpge_epi32_mask() {
50677 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50678 let b = _mm256_set1_epi32(-1);
50679 let r = _mm256_cmpge_epi32_mask(a, b);
50680 assert_eq!(r, 0b11111010)
50681 }
50682
50683 #[simd_test(enable = "avx512f,avx512vl")]
50684 unsafe fn test_mm256_mask_cmpge_epi32_mask() {
50685 let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
50686 let b = _mm256_set1_epi32(-1);
50687 let mask = 0b11111111;
50688 let r = _mm256_mask_cmpge_epi32_mask(mask, a, b);
50689 assert_eq!(r, 0b11111010)
50690 }
50691
50692 #[simd_test(enable = "avx512f,avx512vl")]
50693 unsafe fn test_mm_cmpge_epi32_mask() {
50694 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50695 let b = _mm_set1_epi32(-1);
50696 let r = _mm_cmpge_epi32_mask(a, b);
50697 assert_eq!(r, 0b00001111)
50698 }
50699
50700 #[simd_test(enable = "avx512f,avx512vl")]
50701 unsafe fn test_mm_mask_cmpge_epi32_mask() {
50702 let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
50703 let b = _mm_set1_epi32(-1);
50704 let mask = 0b11111111;
50705 let r = _mm_mask_cmpge_epi32_mask(mask, a, b);
50706 assert_eq!(r, 0b00001111)
50707 }
50708
50709 #[simd_test(enable = "avx512f")]
50710 unsafe fn test_mm512_cmpeq_epi32_mask() {
50711 #[rustfmt::skip]
50712 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50713 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50714 #[rustfmt::skip]
50715 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50716 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50717 let m = _mm512_cmpeq_epi32_mask(b, a);
50718 assert_eq!(m, 0b11001111_11001111);
50719 }
50720
50721 #[simd_test(enable = "avx512f")]
50722 unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
50723 #[rustfmt::skip]
50724 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50725 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50726 #[rustfmt::skip]
50727 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50728 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50729 let mask = 0b01111010_01111010;
50730 let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
50731 assert_eq!(r, 0b01001010_01001010);
50732 }
50733
50734 #[simd_test(enable = "avx512f,avx512vl")]
50735 unsafe fn test_mm256_cmpeq_epi32_mask() {
50736 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50737 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50738 let m = _mm256_cmpeq_epi32_mask(b, a);
50739 assert_eq!(m, 0b11001111);
50740 }
50741
50742 #[simd_test(enable = "avx512f,avx512vl")]
50743 unsafe fn test_mm256_mask_cmpeq_epi32_mask() {
50744 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50745 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50746 let mask = 0b01111010;
50747 let r = _mm256_mask_cmpeq_epi32_mask(mask, b, a);
50748 assert_eq!(r, 0b01001010);
50749 }
50750
50751 #[simd_test(enable = "avx512f,avx512vl")]
50752 unsafe fn test_mm_cmpeq_epi32_mask() {
50753 let a = _mm_set_epi32(0, 1, -1, 13);
50754 let b = _mm_set_epi32(0, 1, 13, 42);
50755 let m = _mm_cmpeq_epi32_mask(b, a);
50756 assert_eq!(m, 0b00001100);
50757 }
50758
50759 #[simd_test(enable = "avx512f,avx512vl")]
50760 unsafe fn test_mm_mask_cmpeq_epi32_mask() {
50761 let a = _mm_set_epi32(0, 1, -1, 13);
50762 let b = _mm_set_epi32(0, 1, 13, 42);
50763 let mask = 0b11111111;
50764 let r = _mm_mask_cmpeq_epi32_mask(mask, b, a);
50765 assert_eq!(r, 0b00001100);
50766 }
50767
50768 #[simd_test(enable = "avx512f")]
50769 unsafe fn test_mm512_cmpneq_epi32_mask() {
50770 #[rustfmt::skip]
50771 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50772 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50773 #[rustfmt::skip]
50774 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50775 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50776 let m = _mm512_cmpneq_epi32_mask(b, a);
50777 assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
50778 }
50779
50780 #[simd_test(enable = "avx512f")]
50781 unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
50782 #[rustfmt::skip]
50783 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
50784 0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
50785 #[rustfmt::skip]
50786 let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
50787 0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50788 let mask = 0b01111010_01111010;
50789 let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
50790 assert_eq!(r, 0b00110010_00110010)
50791 }
50792
50793 #[simd_test(enable = "avx512f,avx512vl")]
50794 unsafe fn test_mm256_cmpneq_epi32_mask() {
50795 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50796 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50797 let m = _mm256_cmpneq_epi32_mask(b, a);
50798 assert_eq!(m, !_mm256_cmpeq_epi32_mask(b, a));
50799 }
50800
50801 #[simd_test(enable = "avx512f,avx512vl")]
50802 unsafe fn test_mm256_mask_cmpneq_epi32_mask() {
50803 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
50804 let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
50805 let mask = 0b11111111;
50806 let r = _mm256_mask_cmpneq_epi32_mask(mask, b, a);
50807 assert_eq!(r, 0b00110011)
50808 }
50809
50810 #[simd_test(enable = "avx512f,avx512vl")]
50811 unsafe fn test_mm_cmpneq_epi32_mask() {
50812 let a = _mm_set_epi32(0, 1, -1, 13);
50813 let b = _mm_set_epi32(0, 1, 13, 42);
50814 let r = _mm_cmpneq_epi32_mask(b, a);
50815 assert_eq!(r, 0b00000011)
50816 }
50817
50818 #[simd_test(enable = "avx512f,avx512vl")]
50819 unsafe fn test_mm_mask_cmpneq_epi32_mask() {
50820 let a = _mm_set_epi32(0, 1, -1, 13);
50821 let b = _mm_set_epi32(0, 1, 13, 42);
50822 let mask = 0b11111111;
50823 let r = _mm_mask_cmpneq_epi32_mask(mask, b, a);
50824 assert_eq!(r, 0b00000011)
50825 }
50826
50827 #[simd_test(enable = "avx512f")]
50828 unsafe fn test_mm512_cmp_epi32_mask() {
50829 #[rustfmt::skip]
50830 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50831 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50832 let b = _mm512_set1_epi32(-1);
50833 let m = _mm512_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50834 assert_eq!(m, 0b00000101_00000101);
50835 }
50836
50837 #[simd_test(enable = "avx512f")]
50838 unsafe fn test_mm512_mask_cmp_epi32_mask() {
50839 #[rustfmt::skip]
50840 let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
50841 0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50842 let b = _mm512_set1_epi32(-1);
50843 let mask = 0b01100110_01100110;
50844 let r = _mm512_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50845 assert_eq!(r, 0b00000100_00000100);
50846 }
50847
50848 #[simd_test(enable = "avx512f,avx512vl")]
50849 unsafe fn test_mm256_cmp_epi32_mask() {
50850 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50851 let b = _mm256_set1_epi32(-1);
50852 let m = _mm256_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50853 assert_eq!(m, 0b00000101);
50854 }
50855
50856 #[simd_test(enable = "avx512f,avx512vl")]
50857 unsafe fn test_mm256_mask_cmp_epi32_mask() {
50858 let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
50859 let b = _mm256_set1_epi32(-1);
50860 let mask = 0b01100110;
50861 let r = _mm256_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50862 assert_eq!(r, 0b00000100);
50863 }
50864
50865 #[simd_test(enable = "avx512f,avx512vl")]
50866 unsafe fn test_mm_cmp_epi32_mask() {
50867 let a = _mm_set_epi32(0, 1, -1, 13);
50868 let b = _mm_set1_epi32(1);
50869 let m = _mm_cmp_epi32_mask::<_MM_CMPINT_LT>(a, b);
50870 assert_eq!(m, 0b00001010);
50871 }
50872
50873 #[simd_test(enable = "avx512f,avx512vl")]
50874 unsafe fn test_mm_mask_cmp_epi32_mask() {
50875 let a = _mm_set_epi32(0, 1, -1, 13);
50876 let b = _mm_set1_epi32(1);
50877 let mask = 0b11111111;
50878 let r = _mm_mask_cmp_epi32_mask::<_MM_CMPINT_LT>(mask, a, b);
50879 assert_eq!(r, 0b00001010);
50880 }
50881
50882 #[simd_test(enable = "avx512f")]
50883 unsafe fn test_mm512_set_epi8() {
50884 let r = _mm512_set1_epi8(2);
50885 assert_eq_m512i(
50886 r,
50887 _mm512_set_epi8(
50888 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50889 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50890 2, 2, 2, 2, 2, 2, 2, 2,
50891 ),
50892 )
50893 }
50894
50895 #[simd_test(enable = "avx512f")]
50896 unsafe fn test_mm512_set_epi16() {
50897 let r = _mm512_set1_epi16(2);
50898 assert_eq_m512i(
50899 r,
50900 _mm512_set_epi16(
50901 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50902 2, 2, 2, 2,
50903 ),
50904 )
50905 }
50906
50907 #[simd_test(enable = "avx512f")]
50908 unsafe fn test_mm512_set_epi32() {
50909 let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50910 assert_eq_m512i(
50911 r,
50912 _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
50913 )
50914 }
50915
50916 #[simd_test(enable = "avx512f")]
50917 unsafe fn test_mm512_setr_epi32() {
50918 let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
50919 assert_eq_m512i(
50920 r,
50921 _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
50922 )
50923 }
50924
50925 #[simd_test(enable = "avx512f")]
50926 unsafe fn test_mm512_set1_epi8() {
50927 let r = _mm512_set_epi8(
50928 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50929 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50930 2, 2, 2, 2, 2, 2,
50931 );
50932 assert_eq_m512i(r, _mm512_set1_epi8(2));
50933 }
50934
50935 #[simd_test(enable = "avx512f")]
50936 unsafe fn test_mm512_set1_epi16() {
50937 let r = _mm512_set_epi16(
50938 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
50939 2, 2, 2,
50940 );
50941 assert_eq_m512i(r, _mm512_set1_epi16(2));
50942 }
50943
50944 #[simd_test(enable = "avx512f")]
50945 unsafe fn test_mm512_set1_epi32() {
50946 let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
50947 assert_eq_m512i(r, _mm512_set1_epi32(2));
50948 }
50949
50950 #[simd_test(enable = "avx512f")]
50951 unsafe fn test_mm512_setzero_si512() {
50952 assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
50953 }
50954
50955 #[simd_test(enable = "avx512f")]
50956 unsafe fn test_mm512_setzero_epi32() {
50957 assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32());
50958 }
50959
50960 #[simd_test(enable = "avx512f")]
50961 unsafe fn test_mm512_set_ps() {
50962 let r = _mm512_setr_ps(
50963 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
50964 );
50965 assert_eq_m512(
50966 r,
50967 _mm512_set_ps(
50968 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
50969 ),
50970 )
50971 }
50972
50973 #[simd_test(enable = "avx512f")]
50974 unsafe fn test_mm512_setr_ps() {
50975 let r = _mm512_set_ps(
50976 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
50977 );
50978 assert_eq_m512(
50979 r,
50980 _mm512_setr_ps(
50981 15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
50982 ),
50983 )
50984 }
50985
50986 #[simd_test(enable = "avx512f")]
50987 unsafe fn test_mm512_set1_ps() {
50988 #[rustfmt::skip]
50989 let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
50990 2., 2., 2., 2., 2., 2., 2., 2.);
50991 assert_eq_m512(expected, _mm512_set1_ps(2.));
50992 }
50993
50994 #[simd_test(enable = "avx512f")]
50995 unsafe fn test_mm512_set4_epi32() {
50996 let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
50997 assert_eq_m512i(r, _mm512_set4_epi32(4, 3, 2, 1));
50998 }
50999
51000 #[simd_test(enable = "avx512f")]
51001 unsafe fn test_mm512_set4_ps() {
51002 let r = _mm512_set_ps(
51003 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
51004 );
51005 assert_eq_m512(r, _mm512_set4_ps(4., 3., 2., 1.));
51006 }
51007
51008 #[simd_test(enable = "avx512f")]
51009 unsafe fn test_mm512_setr4_epi32() {
51010 let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
51011 assert_eq_m512i(r, _mm512_setr4_epi32(1, 2, 3, 4));
51012 }
51013
51014 #[simd_test(enable = "avx512f")]
51015 unsafe fn test_mm512_setr4_ps() {
51016 let r = _mm512_set_ps(
51017 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
51018 );
51019 assert_eq_m512(r, _mm512_setr4_ps(1., 2., 3., 4.));
51020 }
51021
51022 #[simd_test(enable = "avx512f")]
51023 unsafe fn test_mm512_setzero_ps() {
51024 assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
51025 }
51026
51027 #[simd_test(enable = "avx512f")]
51028 unsafe fn test_mm512_setzero() {
51029 assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.));
51030 }
51031
51032 #[simd_test(enable = "avx512f")]
51033 unsafe fn test_mm512_loadu_pd() {
51034 let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
51035 let p = a.as_ptr();
51036 let r = _mm512_loadu_pd(black_box(p));
51037 let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
51038 assert_eq_m512d(r, e);
51039 }
51040
51041 #[simd_test(enable = "avx512f")]
51042 unsafe fn test_mm512_storeu_pd() {
51043 let a = _mm512_set1_pd(9.);
51044 let mut r = _mm512_undefined_pd();
51045 _mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
51046 assert_eq_m512d(r, a);
51047 }
51048
51049 #[simd_test(enable = "avx512f")]
51050 unsafe fn test_mm512_loadu_ps() {
51051 let a = &[
51052 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
51053 ];
51054 let p = a.as_ptr();
51055 let r = _mm512_loadu_ps(black_box(p));
51056 let e = _mm512_setr_ps(
51057 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
51058 );
51059 assert_eq_m512(r, e);
51060 }
51061
51062 #[simd_test(enable = "avx512f")]
51063 unsafe fn test_mm512_storeu_ps() {
51064 let a = _mm512_set1_ps(9.);
51065 let mut r = _mm512_undefined_ps();
51066 _mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
51067 assert_eq_m512(r, a);
51068 }
51069
51070 #[simd_test(enable = "avx512f")]
51071 unsafe fn test_mm512_mask_loadu_epi32() {
51072 let src = _mm512_set1_epi32(42);
51073 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
51074 let p = a.as_ptr();
51075 let m = 0b11101000_11001010;
51076 let r = _mm512_mask_loadu_epi32(src, m, black_box(p));
51077 let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51078 assert_eq_m512i(r, e);
51079 }
51080
51081 #[simd_test(enable = "avx512f")]
51082 unsafe fn test_mm512_maskz_loadu_epi32() {
51083 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
51084 let p = a.as_ptr();
51085 let m = 0b11101000_11001010;
51086 let r = _mm512_maskz_loadu_epi32(m, black_box(p));
51087 let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
51088 assert_eq_m512i(r, e);
51089 }
51090
51091 #[simd_test(enable = "avx512f")]
51092 unsafe fn test_mm512_mask_load_epi32() {
51093 #[repr(align(64))]
51094 struct Align {
51095 data: [i32; 16], // 64 bytes
51096 }
51097 let src = _mm512_set1_epi32(42);
51098 let a = Align {
51099 data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
51100 };
51101 let p = a.data.as_ptr();
51102 let m = 0b11101000_11001010;
51103 let r = _mm512_mask_load_epi32(src, m, black_box(p));
51104 let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51105 assert_eq_m512i(r, e);
51106 }
51107
51108 #[simd_test(enable = "avx512f")]
51109 unsafe fn test_mm512_maskz_load_epi32() {
51110 #[repr(align(64))]
51111 struct Align {
51112 data: [i32; 16], // 64 bytes
51113 }
51114 let a = Align {
51115 data: [1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
51116 };
51117 let p = a.data.as_ptr();
51118 let m = 0b11101000_11001010;
51119 let r = _mm512_maskz_load_epi32(m, black_box(p));
51120 let e = _mm512_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8, 0, 0, 0, 12, 0, 14, 15, 16);
51121 assert_eq_m512i(r, e);
51122 }
51123
51124 #[simd_test(enable = "avx512f")]
51125 unsafe fn test_mm512_mask_storeu_epi32() {
51126 let mut r = [42_i32; 16];
51127 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51128 let m = 0b11101000_11001010;
51129 _mm512_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51130 let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51131 assert_eq_m512i(_mm512_loadu_epi32(r.as_ptr()), e);
51132 }
51133
51134 #[simd_test(enable = "avx512f")]
51135 unsafe fn test_mm512_mask_store_epi32() {
51136 #[repr(align(64))]
51137 struct Align {
51138 data: [i32; 16],
51139 }
51140 let mut r = Align { data: [42; 16] };
51141 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
51142 let m = 0b11101000_11001010;
51143 _mm512_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51144 let e = _mm512_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8, 42, 42, 42, 12, 42, 14, 15, 16);
51145 assert_eq_m512i(_mm512_load_epi32(r.data.as_ptr()), e);
51146 }
51147
51148 #[simd_test(enable = "avx512f")]
51149 unsafe fn test_mm512_mask_loadu_epi64() {
51150 let src = _mm512_set1_epi64(42);
51151 let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
51152 let p = a.as_ptr();
51153 let m = 0b11001010;
51154 let r = _mm512_mask_loadu_epi64(src, m, black_box(p));
51155 let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51156 assert_eq_m512i(r, e);
51157 }
51158
51159 #[simd_test(enable = "avx512f")]
51160 unsafe fn test_mm512_maskz_loadu_epi64() {
51161 let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
51162 let p = a.as_ptr();
51163 let m = 0b11001010;
51164 let r = _mm512_maskz_loadu_epi64(m, black_box(p));
51165 let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
51166 assert_eq_m512i(r, e);
51167 }
51168
51169 #[simd_test(enable = "avx512f")]
51170 unsafe fn test_mm512_mask_load_epi64() {
51171 #[repr(align(64))]
51172 struct Align {
51173 data: [i64; 8], // 64 bytes
51174 }
51175 let src = _mm512_set1_epi64(42);
51176 let a = Align {
51177 data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
51178 };
51179 let p = a.data.as_ptr();
51180 let m = 0b11001010;
51181 let r = _mm512_mask_load_epi64(src, m, black_box(p));
51182 let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51183 assert_eq_m512i(r, e);
51184 }
51185
51186 #[simd_test(enable = "avx512f")]
51187 unsafe fn test_mm512_maskz_load_epi64() {
51188 #[repr(align(64))]
51189 struct Align {
51190 data: [i64; 8], // 64 bytes
51191 }
51192 let a = Align {
51193 data: [1_i64, 2, 3, 4, 5, 6, 7, 8],
51194 };
51195 let p = a.data.as_ptr();
51196 let m = 0b11001010;
51197 let r = _mm512_maskz_load_epi64(m, black_box(p));
51198 let e = _mm512_setr_epi64(0, 2, 0, 4, 0, 0, 7, 8);
51199 assert_eq_m512i(r, e);
51200 }
51201
51202 #[simd_test(enable = "avx512f")]
51203 unsafe fn test_mm512_mask_storeu_epi64() {
51204 let mut r = [42_i64; 8];
51205 let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
51206 let m = 0b11001010;
51207 _mm512_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51208 let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51209 assert_eq_m512i(_mm512_loadu_epi64(r.as_ptr()), e);
51210 }
51211
51212 #[simd_test(enable = "avx512f")]
51213 unsafe fn test_mm512_mask_store_epi64() {
51214 #[repr(align(64))]
51215 struct Align {
51216 data: [i64; 8],
51217 }
51218 let mut r = Align { data: [42; 8] };
51219 let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
51220 let m = 0b11001010;
51221 let p = r.data.as_mut_ptr();
51222 _mm512_mask_store_epi64(p, m, a);
51223 let e = _mm512_setr_epi64(42, 2, 42, 4, 42, 42, 7, 8);
51224 assert_eq_m512i(_mm512_load_epi64(r.data.as_ptr()), e);
51225 }
51226
51227 #[simd_test(enable = "avx512f")]
51228 unsafe fn test_mm512_mask_loadu_ps() {
51229 let src = _mm512_set1_ps(42.0);
51230 let a = &[
51231 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
51232 16.0,
51233 ];
51234 let p = a.as_ptr();
51235 let m = 0b11101000_11001010;
51236 let r = _mm512_mask_loadu_ps(src, m, black_box(p));
51237 let e = _mm512_setr_ps(
51238 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51239 16.0,
51240 );
51241 assert_eq_m512(r, e);
51242 }
51243
51244 #[simd_test(enable = "avx512f")]
51245 unsafe fn test_mm512_maskz_loadu_ps() {
51246 let a = &[
51247 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0,
51248 16.0,
51249 ];
51250 let p = a.as_ptr();
51251 let m = 0b11101000_11001010;
51252 let r = _mm512_maskz_loadu_ps(m, black_box(p));
51253 let e = _mm512_setr_ps(
51254 0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
51255 );
51256 assert_eq_m512(r, e);
51257 }
51258
51259 #[simd_test(enable = "avx512f")]
51260 unsafe fn test_mm512_mask_load_ps() {
51261 #[repr(align(64))]
51262 struct Align {
51263 data: [f32; 16], // 64 bytes
51264 }
51265 let src = _mm512_set1_ps(42.0);
51266 let a = Align {
51267 data: [
51268 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
51269 15.0, 16.0,
51270 ],
51271 };
51272 let p = a.data.as_ptr();
51273 let m = 0b11101000_11001010;
51274 let r = _mm512_mask_load_ps(src, m, black_box(p));
51275 let e = _mm512_setr_ps(
51276 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51277 16.0,
51278 );
51279 assert_eq_m512(r, e);
51280 }
51281
51282 #[simd_test(enable = "avx512f")]
51283 unsafe fn test_mm512_maskz_load_ps() {
51284 #[repr(align(64))]
51285 struct Align {
51286 data: [f32; 16], // 64 bytes
51287 }
51288 let a = Align {
51289 data: [
51290 1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0,
51291 15.0, 16.0,
51292 ],
51293 };
51294 let p = a.data.as_ptr();
51295 let m = 0b11101000_11001010;
51296 let r = _mm512_maskz_load_ps(m, black_box(p));
51297 let e = _mm512_setr_ps(
51298 0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0, 0.0, 0.0, 0.0, 12.0, 0.0, 14.0, 15.0, 16.0,
51299 );
51300 assert_eq_m512(r, e);
51301 }
51302
51303 #[simd_test(enable = "avx512f")]
51304 unsafe fn test_mm512_mask_storeu_ps() {
51305 let mut r = [42_f32; 16];
51306 let a = _mm512_setr_ps(
51307 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
51308 );
51309 let m = 0b11101000_11001010;
51310 _mm512_mask_storeu_ps(r.as_mut_ptr(), m, a);
51311 let e = _mm512_setr_ps(
51312 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51313 16.0,
51314 );
51315 assert_eq_m512(_mm512_loadu_ps(r.as_ptr()), e);
51316 }
51317
51318 #[simd_test(enable = "avx512f")]
51319 unsafe fn test_mm512_mask_store_ps() {
51320 #[repr(align(64))]
51321 struct Align {
51322 data: [f32; 16],
51323 }
51324 let mut r = Align { data: [42.0; 16] };
51325 let a = _mm512_setr_ps(
51326 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0,
51327 );
51328 let m = 0b11101000_11001010;
51329 _mm512_mask_store_ps(r.data.as_mut_ptr(), m, a);
51330 let e = _mm512_setr_ps(
51331 42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0, 42.0, 42.0, 42.0, 12.0, 42.0, 14.0, 15.0,
51332 16.0,
51333 );
51334 assert_eq_m512(_mm512_load_ps(r.data.as_ptr()), e);
51335 }
51336
51337 #[simd_test(enable = "avx512f")]
51338 unsafe fn test_mm512_mask_loadu_pd() {
51339 let src = _mm512_set1_pd(42.0);
51340 let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51341 let p = a.as_ptr();
51342 let m = 0b11001010;
51343 let r = _mm512_mask_loadu_pd(src, m, black_box(p));
51344 let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51345 assert_eq_m512d(r, e);
51346 }
51347
51348 #[simd_test(enable = "avx512f")]
51349 unsafe fn test_mm512_maskz_loadu_pd() {
51350 let a = &[1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51351 let p = a.as_ptr();
51352 let m = 0b11001010;
51353 let r = _mm512_maskz_loadu_pd(m, black_box(p));
51354 let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51355 assert_eq_m512d(r, e);
51356 }
51357
51358 #[simd_test(enable = "avx512f")]
51359 unsafe fn test_mm512_mask_load_pd() {
51360 #[repr(align(64))]
51361 struct Align {
51362 data: [f64; 8], // 64 bytes
51363 }
51364 let src = _mm512_set1_pd(42.0);
51365 let a = Align {
51366 data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51367 };
51368 let p = a.data.as_ptr();
51369 let m = 0b11001010;
51370 let r = _mm512_mask_load_pd(src, m, black_box(p));
51371 let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51372 assert_eq_m512d(r, e);
51373 }
51374
51375 #[simd_test(enable = "avx512f")]
51376 unsafe fn test_mm512_maskz_load_pd() {
51377 #[repr(align(64))]
51378 struct Align {
51379 data: [f64; 8], // 64 bytes
51380 }
51381 let a = Align {
51382 data: [1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51383 };
51384 let p = a.data.as_ptr();
51385 let m = 0b11001010;
51386 let r = _mm512_maskz_load_pd(m, black_box(p));
51387 let e = _mm512_setr_pd(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51388 assert_eq_m512d(r, e);
51389 }
51390
51391 #[simd_test(enable = "avx512f")]
51392 unsafe fn test_mm512_mask_storeu_pd() {
51393 let mut r = [42_f64; 8];
51394 let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51395 let m = 0b11001010;
51396 _mm512_mask_storeu_pd(r.as_mut_ptr(), m, a);
51397 let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51398 assert_eq_m512d(_mm512_loadu_pd(r.as_ptr()), e);
51399 }
51400
51401 #[simd_test(enable = "avx512f")]
51402 unsafe fn test_mm512_mask_store_pd() {
51403 #[repr(align(64))]
51404 struct Align {
51405 data: [f64; 8],
51406 }
51407 let mut r = Align { data: [42.0; 8] };
51408 let a = _mm512_setr_pd(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51409 let m = 0b11001010;
51410 _mm512_mask_store_pd(r.data.as_mut_ptr(), m, a);
51411 let e = _mm512_setr_pd(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51412 assert_eq_m512d(_mm512_load_pd(r.data.as_ptr()), e);
51413 }
51414
51415 #[simd_test(enable = "avx512f,avx512vl")]
51416 unsafe fn test_mm256_mask_loadu_epi32() {
51417 let src = _mm256_set1_epi32(42);
51418 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
51419 let p = a.as_ptr();
51420 let m = 0b11001010;
51421 let r = _mm256_mask_loadu_epi32(src, m, black_box(p));
51422 let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51423 assert_eq_m256i(r, e);
51424 }
51425
51426 #[simd_test(enable = "avx512f,avx512vl")]
51427 unsafe fn test_mm256_maskz_loadu_epi32() {
51428 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
51429 let p = a.as_ptr();
51430 let m = 0b11001010;
51431 let r = _mm256_maskz_loadu_epi32(m, black_box(p));
51432 let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
51433 assert_eq_m256i(r, e);
51434 }
51435
51436 #[simd_test(enable = "avx512f,avx512vl")]
51437 unsafe fn test_mm256_mask_load_epi32() {
51438 #[repr(align(32))]
51439 struct Align {
51440 data: [i32; 8], // 32 bytes
51441 }
51442 let src = _mm256_set1_epi32(42);
51443 let a = Align {
51444 data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
51445 };
51446 let p = a.data.as_ptr();
51447 let m = 0b11001010;
51448 let r = _mm256_mask_load_epi32(src, m, black_box(p));
51449 let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51450 assert_eq_m256i(r, e);
51451 }
51452
51453 #[simd_test(enable = "avx512f,avx512vl")]
51454 unsafe fn test_mm256_maskz_load_epi32() {
51455 #[repr(align(32))]
51456 struct Align {
51457 data: [i32; 8], // 32 bytes
51458 }
51459 let a = Align {
51460 data: [1_i32, 2, 3, 4, 5, 6, 7, 8],
51461 };
51462 let p = a.data.as_ptr();
51463 let m = 0b11001010;
51464 let r = _mm256_maskz_load_epi32(m, black_box(p));
51465 let e = _mm256_setr_epi32(0, 2, 0, 4, 0, 0, 7, 8);
51466 assert_eq_m256i(r, e);
51467 }
51468
51469 #[simd_test(enable = "avx512f,avx512vl")]
51470 unsafe fn test_mm256_mask_storeu_epi32() {
51471 let mut r = [42_i32; 8];
51472 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
51473 let m = 0b11001010;
51474 _mm256_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51475 let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51476 assert_eq_m256i(_mm256_loadu_epi32(r.as_ptr()), e);
51477 }
51478
51479 #[simd_test(enable = "avx512f,avx512vl")]
51480 unsafe fn test_mm256_mask_store_epi32() {
51481 #[repr(align(64))]
51482 struct Align {
51483 data: [i32; 8],
51484 }
51485 let mut r = Align { data: [42; 8] };
51486 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
51487 let m = 0b11001010;
51488 _mm256_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51489 let e = _mm256_setr_epi32(42, 2, 42, 4, 42, 42, 7, 8);
51490 assert_eq_m256i(_mm256_load_epi32(r.data.as_ptr()), e);
51491 }
51492
51493 #[simd_test(enable = "avx512f,avx512vl")]
51494 unsafe fn test_mm256_mask_loadu_epi64() {
51495 let src = _mm256_set1_epi64x(42);
51496 let a = &[1_i64, 2, 3, 4];
51497 let p = a.as_ptr();
51498 let m = 0b1010;
51499 let r = _mm256_mask_loadu_epi64(src, m, black_box(p));
51500 let e = _mm256_setr_epi64x(42, 2, 42, 4);
51501 assert_eq_m256i(r, e);
51502 }
51503
51504 #[simd_test(enable = "avx512f,avx512vl")]
51505 unsafe fn test_mm256_maskz_loadu_epi64() {
51506 let a = &[1_i64, 2, 3, 4];
51507 let p = a.as_ptr();
51508 let m = 0b1010;
51509 let r = _mm256_maskz_loadu_epi64(m, black_box(p));
51510 let e = _mm256_setr_epi64x(0, 2, 0, 4);
51511 assert_eq_m256i(r, e);
51512 }
51513
51514 #[simd_test(enable = "avx512f,avx512vl")]
51515 unsafe fn test_mm256_mask_load_epi64() {
51516 #[repr(align(32))]
51517 struct Align {
51518 data: [i64; 4], // 32 bytes
51519 }
51520 let src = _mm256_set1_epi64x(42);
51521 let a = Align {
51522 data: [1_i64, 2, 3, 4],
51523 };
51524 let p = a.data.as_ptr();
51525 let m = 0b1010;
51526 let r = _mm256_mask_load_epi64(src, m, black_box(p));
51527 let e = _mm256_setr_epi64x(42, 2, 42, 4);
51528 assert_eq_m256i(r, e);
51529 }
51530
51531 #[simd_test(enable = "avx512f,avx512vl")]
51532 unsafe fn test_mm256_maskz_load_epi64() {
51533 #[repr(align(32))]
51534 struct Align {
51535 data: [i64; 4], // 32 bytes
51536 }
51537 let a = Align {
51538 data: [1_i64, 2, 3, 4],
51539 };
51540 let p = a.data.as_ptr();
51541 let m = 0b1010;
51542 let r = _mm256_maskz_load_epi64(m, black_box(p));
51543 let e = _mm256_setr_epi64x(0, 2, 0, 4);
51544 assert_eq_m256i(r, e);
51545 }
51546
51547 #[simd_test(enable = "avx512f,avx512vl")]
51548 unsafe fn test_mm256_mask_storeu_epi64() {
51549 let mut r = [42_i64; 4];
51550 let a = _mm256_setr_epi64x(1, 2, 3, 4);
51551 let m = 0b1010;
51552 _mm256_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51553 let e = _mm256_setr_epi64x(42, 2, 42, 4);
51554 assert_eq_m256i(_mm256_loadu_epi64(r.as_ptr()), e);
51555 }
51556
51557 #[simd_test(enable = "avx512f,avx512vl")]
51558 unsafe fn test_mm256_mask_store_epi64() {
51559 #[repr(align(32))]
51560 struct Align {
51561 data: [i64; 4],
51562 }
51563 let mut r = Align { data: [42; 4] };
51564 let a = _mm256_setr_epi64x(1, 2, 3, 4);
51565 let m = 0b1010;
51566 _mm256_mask_store_epi64(r.data.as_mut_ptr(), m, a);
51567 let e = _mm256_setr_epi64x(42, 2, 42, 4);
51568 assert_eq_m256i(_mm256_load_epi64(r.data.as_ptr()), e);
51569 }
51570
51571 #[simd_test(enable = "avx512f,avx512vl")]
51572 unsafe fn test_mm256_mask_loadu_ps() {
51573 let src = _mm256_set1_ps(42.0);
51574 let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51575 let p = a.as_ptr();
51576 let m = 0b11001010;
51577 let r = _mm256_mask_loadu_ps(src, m, black_box(p));
51578 let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51579 assert_eq_m256(r, e);
51580 }
51581
51582 #[simd_test(enable = "avx512f,avx512vl")]
51583 unsafe fn test_mm256_maskz_loadu_ps() {
51584 let a = &[1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
51585 let p = a.as_ptr();
51586 let m = 0b11001010;
51587 let r = _mm256_maskz_loadu_ps(m, black_box(p));
51588 let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51589 assert_eq_m256(r, e);
51590 }
51591
51592 #[simd_test(enable = "avx512f,avx512vl")]
51593 unsafe fn test_mm256_mask_load_ps() {
51594 #[repr(align(32))]
51595 struct Align {
51596 data: [f32; 8], // 32 bytes
51597 }
51598 let src = _mm256_set1_ps(42.0);
51599 let a = Align {
51600 data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51601 };
51602 let p = a.data.as_ptr();
51603 let m = 0b11001010;
51604 let r = _mm256_mask_load_ps(src, m, black_box(p));
51605 let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51606 assert_eq_m256(r, e);
51607 }
51608
51609 #[simd_test(enable = "avx512f,avx512vl")]
51610 unsafe fn test_mm256_maskz_load_ps() {
51611 #[repr(align(32))]
51612 struct Align {
51613 data: [f32; 8], // 32 bytes
51614 }
51615 let a = Align {
51616 data: [1.0_f32, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0],
51617 };
51618 let p = a.data.as_ptr();
51619 let m = 0b11001010;
51620 let r = _mm256_maskz_load_ps(m, black_box(p));
51621 let e = _mm256_setr_ps(0.0, 2.0, 0.0, 4.0, 0.0, 0.0, 7.0, 8.0);
51622 assert_eq_m256(r, e);
51623 }
51624
51625 #[simd_test(enable = "avx512f,avx512vl")]
51626 unsafe fn test_mm256_mask_storeu_ps() {
51627 let mut r = [42_f32; 8];
51628 let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51629 let m = 0b11001010;
51630 _mm256_mask_storeu_ps(r.as_mut_ptr(), m, a);
51631 let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51632 assert_eq_m256(_mm256_loadu_ps(r.as_ptr()), e);
51633 }
51634
51635 #[simd_test(enable = "avx512f,avx512vl")]
51636 unsafe fn test_mm256_mask_store_ps() {
51637 #[repr(align(32))]
51638 struct Align {
51639 data: [f32; 8],
51640 }
51641 let mut r = Align { data: [42.0; 8] };
51642 let a = _mm256_setr_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
51643 let m = 0b11001010;
51644 _mm256_mask_store_ps(r.data.as_mut_ptr(), m, a);
51645 let e = _mm256_setr_ps(42.0, 2.0, 42.0, 4.0, 42.0, 42.0, 7.0, 8.0);
51646 assert_eq_m256(_mm256_load_ps(r.data.as_ptr()), e);
51647 }
51648
51649 #[simd_test(enable = "avx512f,avx512vl")]
51650 unsafe fn test_mm256_mask_loadu_pd() {
51651 let src = _mm256_set1_pd(42.0);
51652 let a = &[1.0_f64, 2.0, 3.0, 4.0];
51653 let p = a.as_ptr();
51654 let m = 0b1010;
51655 let r = _mm256_mask_loadu_pd(src, m, black_box(p));
51656 let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51657 assert_eq_m256d(r, e);
51658 }
51659
51660 #[simd_test(enable = "avx512f,avx512vl")]
51661 unsafe fn test_mm256_maskz_loadu_pd() {
51662 let a = &[1.0_f64, 2.0, 3.0, 4.0];
51663 let p = a.as_ptr();
51664 let m = 0b1010;
51665 let r = _mm256_maskz_loadu_pd(m, black_box(p));
51666 let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
51667 assert_eq_m256d(r, e);
51668 }
51669
51670 #[simd_test(enable = "avx512f,avx512vl")]
51671 unsafe fn test_mm256_mask_load_pd() {
51672 #[repr(align(32))]
51673 struct Align {
51674 data: [f64; 4], // 32 bytes
51675 }
51676 let src = _mm256_set1_pd(42.0);
51677 let a = Align {
51678 data: [1.0_f64, 2.0, 3.0, 4.0],
51679 };
51680 let p = a.data.as_ptr();
51681 let m = 0b1010;
51682 let r = _mm256_mask_load_pd(src, m, black_box(p));
51683 let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51684 assert_eq_m256d(r, e);
51685 }
51686
51687 #[simd_test(enable = "avx512f,avx512vl")]
51688 unsafe fn test_mm256_maskz_load_pd() {
51689 #[repr(align(32))]
51690 struct Align {
51691 data: [f64; 4], // 32 bytes
51692 }
51693 let a = Align {
51694 data: [1.0_f64, 2.0, 3.0, 4.0],
51695 };
51696 let p = a.data.as_ptr();
51697 let m = 0b1010;
51698 let r = _mm256_maskz_load_pd(m, black_box(p));
51699 let e = _mm256_setr_pd(0.0, 2.0, 0.0, 4.0);
51700 assert_eq_m256d(r, e);
51701 }
51702
51703 #[simd_test(enable = "avx512f,avx512vl")]
51704 unsafe fn test_mm256_mask_storeu_pd() {
51705 let mut r = [42_f64; 4];
51706 let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
51707 let m = 0b1010;
51708 _mm256_mask_storeu_pd(r.as_mut_ptr(), m, a);
51709 let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51710 assert_eq_m256d(_mm256_loadu_pd(r.as_ptr()), e);
51711 }
51712
51713 #[simd_test(enable = "avx512f,avx512vl")]
51714 unsafe fn test_mm256_mask_store_pd() {
51715 #[repr(align(32))]
51716 struct Align {
51717 data: [f64; 4],
51718 }
51719 let mut r = Align { data: [42.0; 4] };
51720 let a = _mm256_setr_pd(1.0, 2.0, 3.0, 4.0);
51721 let m = 0b1010;
51722 _mm256_mask_store_pd(r.data.as_mut_ptr(), m, a);
51723 let e = _mm256_setr_pd(42.0, 2.0, 42.0, 4.0);
51724 assert_eq_m256d(_mm256_load_pd(r.data.as_ptr()), e);
51725 }
51726
51727 #[simd_test(enable = "avx512f,avx512vl")]
51728 unsafe fn test_mm_mask_loadu_epi32() {
51729 let src = _mm_set1_epi32(42);
51730 let a = &[1_i32, 2, 3, 4];
51731 let p = a.as_ptr();
51732 let m = 0b1010;
51733 let r = _mm_mask_loadu_epi32(src, m, black_box(p));
51734 let e = _mm_setr_epi32(42, 2, 42, 4);
51735 assert_eq_m128i(r, e);
51736 }
51737
51738 #[simd_test(enable = "avx512f,avx512vl")]
51739 unsafe fn test_mm_maskz_loadu_epi32() {
51740 let a = &[1_i32, 2, 3, 4];
51741 let p = a.as_ptr();
51742 let m = 0b1010;
51743 let r = _mm_maskz_loadu_epi32(m, black_box(p));
51744 let e = _mm_setr_epi32(0, 2, 0, 4);
51745 assert_eq_m128i(r, e);
51746 }
51747
51748 #[simd_test(enable = "avx512f,avx512vl")]
51749 unsafe fn test_mm_mask_load_epi32() {
51750 #[repr(align(16))]
51751 struct Align {
51752 data: [i32; 4], // 32 bytes
51753 }
51754 let src = _mm_set1_epi32(42);
51755 let a = Align {
51756 data: [1_i32, 2, 3, 4],
51757 };
51758 let p = a.data.as_ptr();
51759 let m = 0b1010;
51760 let r = _mm_mask_load_epi32(src, m, black_box(p));
51761 let e = _mm_setr_epi32(42, 2, 42, 4);
51762 assert_eq_m128i(r, e);
51763 }
51764
51765 #[simd_test(enable = "avx512f,avx512vl")]
51766 unsafe fn test_mm_maskz_load_epi32() {
51767 #[repr(align(16))]
51768 struct Align {
51769 data: [i32; 4], // 16 bytes
51770 }
51771 let a = Align {
51772 data: [1_i32, 2, 3, 4],
51773 };
51774 let p = a.data.as_ptr();
51775 let m = 0b1010;
51776 let r = _mm_maskz_load_epi32(m, black_box(p));
51777 let e = _mm_setr_epi32(0, 2, 0, 4);
51778 assert_eq_m128i(r, e);
51779 }
51780
51781 #[simd_test(enable = "avx512f,avx512vl")]
51782 unsafe fn test_mm_mask_storeu_epi32() {
51783 let mut r = [42_i32; 4];
51784 let a = _mm_setr_epi32(1, 2, 3, 4);
51785 let m = 0b1010;
51786 _mm_mask_storeu_epi32(r.as_mut_ptr(), m, a);
51787 let e = _mm_setr_epi32(42, 2, 42, 4);
51788 assert_eq_m128i(_mm_loadu_epi32(r.as_ptr()), e);
51789 }
51790
51791 #[simd_test(enable = "avx512f,avx512vl")]
51792 unsafe fn test_mm_mask_store_epi32() {
51793 #[repr(align(16))]
51794 struct Align {
51795 data: [i32; 4], // 16 bytes
51796 }
51797 let mut r = Align { data: [42; 4] };
51798 let a = _mm_setr_epi32(1, 2, 3, 4);
51799 let m = 0b1010;
51800 _mm_mask_store_epi32(r.data.as_mut_ptr(), m, a);
51801 let e = _mm_setr_epi32(42, 2, 42, 4);
51802 assert_eq_m128i(_mm_load_epi32(r.data.as_ptr()), e);
51803 }
51804
51805 #[simd_test(enable = "avx512f,avx512vl")]
51806 unsafe fn test_mm_mask_loadu_epi64() {
51807 let src = _mm_set1_epi64x(42);
51808 let a = &[1_i64, 2];
51809 let p = a.as_ptr();
51810 let m = 0b10;
51811 let r = _mm_mask_loadu_epi64(src, m, black_box(p));
51812 let e = _mm_setr_epi64x(42, 2);
51813 assert_eq_m128i(r, e);
51814 }
51815
51816 #[simd_test(enable = "avx512f,avx512vl")]
51817 unsafe fn test_mm_maskz_loadu_epi64() {
51818 let a = &[1_i64, 2];
51819 let p = a.as_ptr();
51820 let m = 0b10;
51821 let r = _mm_maskz_loadu_epi64(m, black_box(p));
51822 let e = _mm_setr_epi64x(0, 2);
51823 assert_eq_m128i(r, e);
51824 }
51825
51826 #[simd_test(enable = "avx512f,avx512vl")]
51827 unsafe fn test_mm_mask_load_epi64() {
51828 #[repr(align(16))]
51829 struct Align {
51830 data: [i64; 2], // 16 bytes
51831 }
51832 let src = _mm_set1_epi64x(42);
51833 let a = Align { data: [1_i64, 2] };
51834 let p = a.data.as_ptr();
51835 let m = 0b10;
51836 let r = _mm_mask_load_epi64(src, m, black_box(p));
51837 let e = _mm_setr_epi64x(42, 2);
51838 assert_eq_m128i(r, e);
51839 }
51840
51841 #[simd_test(enable = "avx512f,avx512vl")]
51842 unsafe fn test_mm_maskz_load_epi64() {
51843 #[repr(align(16))]
51844 struct Align {
51845 data: [i64; 2], // 16 bytes
51846 }
51847 let a = Align { data: [1_i64, 2] };
51848 let p = a.data.as_ptr();
51849 let m = 0b10;
51850 let r = _mm_maskz_load_epi64(m, black_box(p));
51851 let e = _mm_setr_epi64x(0, 2);
51852 assert_eq_m128i(r, e);
51853 }
51854
51855 #[simd_test(enable = "avx512f,avx512vl")]
51856 unsafe fn test_mm_mask_storeu_epi64() {
51857 let mut r = [42_i64; 2];
51858 let a = _mm_setr_epi64x(1, 2);
51859 let m = 0b10;
51860 _mm_mask_storeu_epi64(r.as_mut_ptr(), m, a);
51861 let e = _mm_setr_epi64x(42, 2);
51862 assert_eq_m128i(_mm_loadu_epi64(r.as_ptr()), e);
51863 }
51864
51865 #[simd_test(enable = "avx512f,avx512vl")]
51866 unsafe fn test_mm_mask_store_epi64() {
51867 #[repr(align(16))]
51868 struct Align {
51869 data: [i64; 2], // 16 bytes
51870 }
51871 let mut r = Align { data: [42; 2] };
51872 let a = _mm_setr_epi64x(1, 2);
51873 let m = 0b10;
51874 _mm_mask_store_epi64(r.data.as_mut_ptr(), m, a);
51875 let e = _mm_setr_epi64x(42, 2);
51876 assert_eq_m128i(_mm_load_epi64(r.data.as_ptr()), e);
51877 }
51878
51879 #[simd_test(enable = "avx512f,avx512vl")]
51880 unsafe fn test_mm_mask_loadu_ps() {
51881 let src = _mm_set1_ps(42.0);
51882 let a = &[1.0_f32, 2.0, 3.0, 4.0];
51883 let p = a.as_ptr();
51884 let m = 0b1010;
51885 let r = _mm_mask_loadu_ps(src, m, black_box(p));
51886 let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51887 assert_eq_m128(r, e);
51888 }
51889
51890 #[simd_test(enable = "avx512f,avx512vl")]
51891 unsafe fn test_mm_maskz_loadu_ps() {
51892 let a = &[1.0_f32, 2.0, 3.0, 4.0];
51893 let p = a.as_ptr();
51894 let m = 0b1010;
51895 let r = _mm_maskz_loadu_ps(m, black_box(p));
51896 let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
51897 assert_eq_m128(r, e);
51898 }
51899
51900 #[simd_test(enable = "avx512f,avx512vl")]
51901 unsafe fn test_mm_mask_load_ps() {
51902 #[repr(align(16))]
51903 struct Align {
51904 data: [f32; 4], // 16 bytes
51905 }
51906 let src = _mm_set1_ps(42.0);
51907 let a = Align {
51908 data: [1.0_f32, 2.0, 3.0, 4.0],
51909 };
51910 let p = a.data.as_ptr();
51911 let m = 0b1010;
51912 let r = _mm_mask_load_ps(src, m, black_box(p));
51913 let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51914 assert_eq_m128(r, e);
51915 }
51916
51917 #[simd_test(enable = "avx512f,avx512vl")]
51918 unsafe fn test_mm_maskz_load_ps() {
51919 #[repr(align(16))]
51920 struct Align {
51921 data: [f32; 4], // 16 bytes
51922 }
51923 let a = Align {
51924 data: [1.0_f32, 2.0, 3.0, 4.0],
51925 };
51926 let p = a.data.as_ptr();
51927 let m = 0b1010;
51928 let r = _mm_maskz_load_ps(m, black_box(p));
51929 let e = _mm_setr_ps(0.0, 2.0, 0.0, 4.0);
51930 assert_eq_m128(r, e);
51931 }
51932
51933 #[simd_test(enable = "avx512f,avx512vl")]
51934 unsafe fn test_mm_mask_storeu_ps() {
51935 let mut r = [42_f32; 4];
51936 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
51937 let m = 0b1010;
51938 _mm_mask_storeu_ps(r.as_mut_ptr(), m, a);
51939 let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51940 assert_eq_m128(_mm_loadu_ps(r.as_ptr()), e);
51941 }
51942
51943 #[simd_test(enable = "avx512f,avx512vl")]
51944 unsafe fn test_mm_mask_store_ps() {
51945 #[repr(align(16))]
51946 struct Align {
51947 data: [f32; 4], // 16 bytes
51948 }
51949 let mut r = Align { data: [42.0; 4] };
51950 let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0);
51951 let m = 0b1010;
51952 _mm_mask_store_ps(r.data.as_mut_ptr(), m, a);
51953 let e = _mm_setr_ps(42.0, 2.0, 42.0, 4.0);
51954 assert_eq_m128(_mm_load_ps(r.data.as_ptr()), e);
51955 }
51956
51957 #[simd_test(enable = "avx512f,avx512vl")]
51958 unsafe fn test_mm_mask_loadu_pd() {
51959 let src = _mm_set1_pd(42.0);
51960 let a = &[1.0_f64, 2.0];
51961 let p = a.as_ptr();
51962 let m = 0b10;
51963 let r = _mm_mask_loadu_pd(src, m, black_box(p));
51964 let e = _mm_setr_pd(42.0, 2.0);
51965 assert_eq_m128d(r, e);
51966 }
51967
51968 #[simd_test(enable = "avx512f,avx512vl")]
51969 unsafe fn test_mm_maskz_loadu_pd() {
51970 let a = &[1.0_f64, 2.0];
51971 let p = a.as_ptr();
51972 let m = 0b10;
51973 let r = _mm_maskz_loadu_pd(m, black_box(p));
51974 let e = _mm_setr_pd(0.0, 2.0);
51975 assert_eq_m128d(r, e);
51976 }
51977
51978 #[simd_test(enable = "avx512f,avx512vl")]
51979 unsafe fn test_mm_mask_load_pd() {
51980 #[repr(align(16))]
51981 struct Align {
51982 data: [f64; 2], // 16 bytes
51983 }
51984 let src = _mm_set1_pd(42.0);
51985 let a = Align {
51986 data: [1.0_f64, 2.0],
51987 };
51988 let p = a.data.as_ptr();
51989 let m = 0b10;
51990 let r = _mm_mask_load_pd(src, m, black_box(p));
51991 let e = _mm_setr_pd(42.0, 2.0);
51992 assert_eq_m128d(r, e);
51993 }
51994
51995 #[simd_test(enable = "avx512f,avx512vl")]
51996 unsafe fn test_mm_maskz_load_pd() {
51997 #[repr(align(16))]
51998 struct Align {
51999 data: [f64; 2], // 16 bytes
52000 }
52001 let a = Align {
52002 data: [1.0_f64, 2.0],
52003 };
52004 let p = a.data.as_ptr();
52005 let m = 0b10;
52006 let r = _mm_maskz_load_pd(m, black_box(p));
52007 let e = _mm_setr_pd(0.0, 2.0);
52008 assert_eq_m128d(r, e);
52009 }
52010
52011 #[simd_test(enable = "avx512f")]
52012 unsafe fn test_mm_mask_load_ss() {
52013 #[repr(align(16))]
52014 struct Align {
52015 data: f32,
52016 }
52017 let src = _mm_set_ss(2.0);
52018 let mem = Align { data: 1.0 };
52019 let r = _mm_mask_load_ss(src, 0b1, &mem.data);
52020 assert_eq_m128(r, _mm_set_ss(1.0));
52021 let r = _mm_mask_load_ss(src, 0b0, &mem.data);
52022 assert_eq_m128(r, _mm_set_ss(2.0));
52023 }
52024
52025 #[simd_test(enable = "avx512f")]
52026 unsafe fn test_mm_maskz_load_ss() {
52027 #[repr(align(16))]
52028 struct Align {
52029 data: f32,
52030 }
52031 let mem = Align { data: 1.0 };
52032 let r = _mm_maskz_load_ss(0b1, &mem.data);
52033 assert_eq_m128(r, _mm_set_ss(1.0));
52034 let r = _mm_maskz_load_ss(0b0, &mem.data);
52035 assert_eq_m128(r, _mm_set_ss(0.0));
52036 }
52037
52038 #[simd_test(enable = "avx512f")]
52039 unsafe fn test_mm_mask_load_sd() {
52040 #[repr(align(16))]
52041 struct Align {
52042 data: f64,
52043 }
52044 let src = _mm_set_sd(2.0);
52045 let mem = Align { data: 1.0 };
52046 let r = _mm_mask_load_sd(src, 0b1, &mem.data);
52047 assert_eq_m128d(r, _mm_set_sd(1.0));
52048 let r = _mm_mask_load_sd(src, 0b0, &mem.data);
52049 assert_eq_m128d(r, _mm_set_sd(2.0));
52050 }
52051
52052 #[simd_test(enable = "avx512f")]
52053 unsafe fn test_mm_maskz_load_sd() {
52054 #[repr(align(16))]
52055 struct Align {
52056 data: f64,
52057 }
52058 let mem = Align { data: 1.0 };
52059 let r = _mm_maskz_load_sd(0b1, &mem.data);
52060 assert_eq_m128d(r, _mm_set_sd(1.0));
52061 let r = _mm_maskz_load_sd(0b0, &mem.data);
52062 assert_eq_m128d(r, _mm_set_sd(0.0));
52063 }
52064
52065 #[simd_test(enable = "avx512f,avx512vl")]
52066 unsafe fn test_mm_mask_storeu_pd() {
52067 let mut r = [42_f64; 2];
52068 let a = _mm_setr_pd(1.0, 2.0);
52069 let m = 0b10;
52070 _mm_mask_storeu_pd(r.as_mut_ptr(), m, a);
52071 let e = _mm_setr_pd(42.0, 2.0);
52072 assert_eq_m128d(_mm_loadu_pd(r.as_ptr()), e);
52073 }
52074
52075 #[simd_test(enable = "avx512f,avx512vl")]
52076 unsafe fn test_mm_mask_store_pd() {
52077 #[repr(align(16))]
52078 struct Align {
52079 data: [f64; 2], // 16 bytes
52080 }
52081 let mut r = Align { data: [42.0; 2] };
52082 let a = _mm_setr_pd(1.0, 2.0);
52083 let m = 0b10;
52084 _mm_mask_store_pd(r.data.as_mut_ptr(), m, a);
52085 let e = _mm_setr_pd(42.0, 2.0);
52086 assert_eq_m128d(_mm_load_pd(r.data.as_ptr()), e);
52087 }
52088
52089 #[simd_test(enable = "avx512f")]
52090 unsafe fn test_mm_mask_store_ss() {
52091 #[repr(align(16))]
52092 struct Align {
52093 data: f32,
52094 }
52095 let a = _mm_set_ss(2.0);
52096 let mut mem = Align { data: 1.0 };
52097 _mm_mask_store_ss(&mut mem.data, 0b1, a);
52098 assert_eq!(mem.data, 2.0);
52099 _mm_mask_store_ss(&mut mem.data, 0b0, a);
52100 assert_eq!(mem.data, 2.0);
52101 }
52102
52103 #[simd_test(enable = "avx512f")]
52104 unsafe fn test_mm_mask_store_sd() {
52105 #[repr(align(16))]
52106 struct Align {
52107 data: f64,
52108 }
52109 let a = _mm_set_sd(2.0);
52110 let mut mem = Align { data: 1.0 };
52111 _mm_mask_store_sd(&mut mem.data, 0b1, a);
52112 assert_eq!(mem.data, 2.0);
52113 _mm_mask_store_sd(&mut mem.data, 0b0, a);
52114 assert_eq!(mem.data, 2.0);
52115 }
52116
52117 #[simd_test(enable = "avx512f")]
52118 unsafe fn test_mm512_setr_pd() {
52119 let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
52120 assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
52121 }
52122
52123 #[simd_test(enable = "avx512f")]
52124 unsafe fn test_mm512_set_pd() {
52125 let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
52126 assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
52127 }
52128
52129 #[simd_test(enable = "avx512f")]
52130 unsafe fn test_mm512_rol_epi32() {
52131 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52132 let r = _mm512_rol_epi32::<1>(a);
52133 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52134 assert_eq_m512i(r, e);
52135 }
52136
52137 #[simd_test(enable = "avx512f")]
52138 unsafe fn test_mm512_mask_rol_epi32() {
52139 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52140 let r = _mm512_mask_rol_epi32::<1>(a, 0, a);
52141 assert_eq_m512i(r, a);
52142 let r = _mm512_mask_rol_epi32::<1>(a, 0b11111111_11111111, a);
52143 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52144 assert_eq_m512i(r, e);
52145 }
52146
52147 #[simd_test(enable = "avx512f")]
52148 unsafe fn test_mm512_maskz_rol_epi32() {
52149 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52150 let r = _mm512_maskz_rol_epi32::<1>(0, a);
52151 assert_eq_m512i(r, _mm512_setzero_si512());
52152 let r = _mm512_maskz_rol_epi32::<1>(0b00000000_11111111, a);
52153 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52154 assert_eq_m512i(r, e);
52155 }
52156
52157 #[simd_test(enable = "avx512f,avx512vl")]
52158 unsafe fn test_mm256_rol_epi32() {
52159 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52160 let r = _mm256_rol_epi32::<1>(a);
52161 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52162 assert_eq_m256i(r, e);
52163 }
52164
52165 #[simd_test(enable = "avx512f,avx512vl")]
52166 unsafe fn test_mm256_mask_rol_epi32() {
52167 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52168 let r = _mm256_mask_rol_epi32::<1>(a, 0, a);
52169 assert_eq_m256i(r, a);
52170 let r = _mm256_mask_rol_epi32::<1>(a, 0b11111111, a);
52171 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52172 assert_eq_m256i(r, e);
52173 }
52174
52175 #[simd_test(enable = "avx512f,avx512vl")]
52176 unsafe fn test_mm256_maskz_rol_epi32() {
52177 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52178 let r = _mm256_maskz_rol_epi32::<1>(0, a);
52179 assert_eq_m256i(r, _mm256_setzero_si256());
52180 let r = _mm256_maskz_rol_epi32::<1>(0b11111111, a);
52181 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52182 assert_eq_m256i(r, e);
52183 }
52184
52185 #[simd_test(enable = "avx512f,avx512vl")]
52186 unsafe fn test_mm_rol_epi32() {
52187 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52188 let r = _mm_rol_epi32::<1>(a);
52189 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52190 assert_eq_m128i(r, e);
52191 }
52192
52193 #[simd_test(enable = "avx512f,avx512vl")]
52194 unsafe fn test_mm_mask_rol_epi32() {
52195 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52196 let r = _mm_mask_rol_epi32::<1>(a, 0, a);
52197 assert_eq_m128i(r, a);
52198 let r = _mm_mask_rol_epi32::<1>(a, 0b00001111, a);
52199 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52200 assert_eq_m128i(r, e);
52201 }
52202
52203 #[simd_test(enable = "avx512f,avx512vl")]
52204 unsafe fn test_mm_maskz_rol_epi32() {
52205 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52206 let r = _mm_maskz_rol_epi32::<1>(0, a);
52207 assert_eq_m128i(r, _mm_setzero_si128());
52208 let r = _mm_maskz_rol_epi32::<1>(0b00001111, a);
52209 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52210 assert_eq_m128i(r, e);
52211 }
52212
52213 #[simd_test(enable = "avx512f")]
52214 unsafe fn test_mm512_ror_epi32() {
52215 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52216 let r = _mm512_ror_epi32::<1>(a);
52217 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52218 assert_eq_m512i(r, e);
52219 }
52220
52221 #[simd_test(enable = "avx512f")]
52222 unsafe fn test_mm512_mask_ror_epi32() {
52223 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52224 let r = _mm512_mask_ror_epi32::<1>(a, 0, a);
52225 assert_eq_m512i(r, a);
52226 let r = _mm512_mask_ror_epi32::<1>(a, 0b11111111_11111111, a);
52227 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52228 assert_eq_m512i(r, e);
52229 }
52230
52231 #[simd_test(enable = "avx512f")]
52232 unsafe fn test_mm512_maskz_ror_epi32() {
52233 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52234 let r = _mm512_maskz_ror_epi32::<1>(0, a);
52235 assert_eq_m512i(r, _mm512_setzero_si512());
52236 let r = _mm512_maskz_ror_epi32::<1>(0b00000000_11111111, a);
52237 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52238 assert_eq_m512i(r, e);
52239 }
52240
52241 #[simd_test(enable = "avx512f,avx512vl")]
52242 unsafe fn test_mm256_ror_epi32() {
52243 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52244 let r = _mm256_ror_epi32::<1>(a);
52245 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52246 assert_eq_m256i(r, e);
52247 }
52248
52249 #[simd_test(enable = "avx512f,avx512vl")]
52250 unsafe fn test_mm256_mask_ror_epi32() {
52251 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52252 let r = _mm256_mask_ror_epi32::<1>(a, 0, a);
52253 assert_eq_m256i(r, a);
52254 let r = _mm256_mask_ror_epi32::<1>(a, 0b11111111, a);
52255 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52256 assert_eq_m256i(r, e);
52257 }
52258
52259 #[simd_test(enable = "avx512f,avx512vl")]
52260 unsafe fn test_mm256_maskz_ror_epi32() {
52261 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52262 let r = _mm256_maskz_ror_epi32::<1>(0, a);
52263 assert_eq_m256i(r, _mm256_setzero_si256());
52264 let r = _mm256_maskz_ror_epi32::<1>(0b11111111, a);
52265 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52266 assert_eq_m256i(r, e);
52267 }
52268
52269 #[simd_test(enable = "avx512f,avx512vl")]
52270 unsafe fn test_mm_ror_epi32() {
52271 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52272 let r = _mm_ror_epi32::<1>(a);
52273 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52274 assert_eq_m128i(r, e);
52275 }
52276
52277 #[simd_test(enable = "avx512f,avx512vl")]
52278 unsafe fn test_mm_mask_ror_epi32() {
52279 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52280 let r = _mm_mask_ror_epi32::<1>(a, 0, a);
52281 assert_eq_m128i(r, a);
52282 let r = _mm_mask_ror_epi32::<1>(a, 0b00001111, a);
52283 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52284 assert_eq_m128i(r, e);
52285 }
52286
52287 #[simd_test(enable = "avx512f,avx512vl")]
52288 unsafe fn test_mm_maskz_ror_epi32() {
52289 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52290 let r = _mm_maskz_ror_epi32::<1>(0, a);
52291 assert_eq_m128i(r, _mm_setzero_si128());
52292 let r = _mm_maskz_ror_epi32::<1>(0b00001111, a);
52293 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52294 assert_eq_m128i(r, e);
52295 }
52296
52297 #[simd_test(enable = "avx512f")]
52298 unsafe fn test_mm512_slli_epi32() {
52299 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52300 let r = _mm512_slli_epi32::<1>(a);
52301 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52302 assert_eq_m512i(r, e);
52303 }
52304
52305 #[simd_test(enable = "avx512f")]
52306 unsafe fn test_mm512_mask_slli_epi32() {
52307 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52308 let r = _mm512_mask_slli_epi32::<1>(a, 0, a);
52309 assert_eq_m512i(r, a);
52310 let r = _mm512_mask_slli_epi32::<1>(a, 0b11111111_11111111, a);
52311 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52312 assert_eq_m512i(r, e);
52313 }
52314
52315 #[simd_test(enable = "avx512f")]
52316 unsafe fn test_mm512_maskz_slli_epi32() {
52317 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52318 let r = _mm512_maskz_slli_epi32::<1>(0, a);
52319 assert_eq_m512i(r, _mm512_setzero_si512());
52320 let r = _mm512_maskz_slli_epi32::<1>(0b00000000_11111111, a);
52321 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
52322 assert_eq_m512i(r, e);
52323 }
52324
52325 #[simd_test(enable = "avx512f,avx512vl")]
52326 unsafe fn test_mm256_mask_slli_epi32() {
52327 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52328 let r = _mm256_mask_slli_epi32::<1>(a, 0, a);
52329 assert_eq_m256i(r, a);
52330 let r = _mm256_mask_slli_epi32::<1>(a, 0b11111111, a);
52331 let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52332 assert_eq_m256i(r, e);
52333 }
52334
52335 #[simd_test(enable = "avx512f,avx512vl")]
52336 unsafe fn test_mm256_maskz_slli_epi32() {
52337 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52338 let r = _mm256_maskz_slli_epi32::<1>(0, a);
52339 assert_eq_m256i(r, _mm256_setzero_si256());
52340 let r = _mm256_maskz_slli_epi32::<1>(0b11111111, a);
52341 let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52342 assert_eq_m256i(r, e);
52343 }
52344
52345 #[simd_test(enable = "avx512f,avx512vl")]
52346 unsafe fn test_mm_mask_slli_epi32() {
52347 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52348 let r = _mm_mask_slli_epi32::<1>(a, 0, a);
52349 assert_eq_m128i(r, a);
52350 let r = _mm_mask_slli_epi32::<1>(a, 0b00001111, a);
52351 let e = _mm_set_epi32(0, 2, 2, 2);
52352 assert_eq_m128i(r, e);
52353 }
52354
52355 #[simd_test(enable = "avx512f,avx512vl")]
52356 unsafe fn test_mm_maskz_slli_epi32() {
52357 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52358 let r = _mm_maskz_slli_epi32::<1>(0, a);
52359 assert_eq_m128i(r, _mm_setzero_si128());
52360 let r = _mm_maskz_slli_epi32::<1>(0b00001111, a);
52361 let e = _mm_set_epi32(0, 2, 2, 2);
52362 assert_eq_m128i(r, e);
52363 }
52364
52365 #[simd_test(enable = "avx512f")]
52366 unsafe fn test_mm512_srli_epi32() {
52367 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52368 let r = _mm512_srli_epi32::<1>(a);
52369 let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52370 assert_eq_m512i(r, e);
52371 }
52372
52373 #[simd_test(enable = "avx512f")]
52374 unsafe fn test_mm512_mask_srli_epi32() {
52375 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52376 let r = _mm512_mask_srli_epi32::<1>(a, 0, a);
52377 assert_eq_m512i(r, a);
52378 let r = _mm512_mask_srli_epi32::<1>(a, 0b11111111_11111111, a);
52379 let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52380 assert_eq_m512i(r, e);
52381 }
52382
52383 #[simd_test(enable = "avx512f")]
52384 unsafe fn test_mm512_maskz_srli_epi32() {
52385 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
52386 let r = _mm512_maskz_srli_epi32::<1>(0, a);
52387 assert_eq_m512i(r, _mm512_setzero_si512());
52388 let r = _mm512_maskz_srli_epi32::<1>(0b00000000_11111111, a);
52389 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
52390 assert_eq_m512i(r, e);
52391 }
52392
52393 #[simd_test(enable = "avx512f,avx512vl")]
52394 unsafe fn test_mm256_mask_srli_epi32() {
52395 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52396 let r = _mm256_mask_srli_epi32::<1>(a, 0, a);
52397 assert_eq_m256i(r, a);
52398 let r = _mm256_mask_srli_epi32::<1>(a, 0b11111111, a);
52399 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52400 assert_eq_m256i(r, e);
52401 }
52402
52403 #[simd_test(enable = "avx512f,avx512vl")]
52404 unsafe fn test_mm256_maskz_srli_epi32() {
52405 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52406 let r = _mm256_maskz_srli_epi32::<1>(0, a);
52407 assert_eq_m256i(r, _mm256_setzero_si256());
52408 let r = _mm256_maskz_srli_epi32::<1>(0b11111111, a);
52409 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52410 assert_eq_m256i(r, e);
52411 }
52412
52413 #[simd_test(enable = "avx512f,avx512vl")]
52414 unsafe fn test_mm_mask_srli_epi32() {
52415 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52416 let r = _mm_mask_srli_epi32::<1>(a, 0, a);
52417 assert_eq_m128i(r, a);
52418 let r = _mm_mask_srli_epi32::<1>(a, 0b00001111, a);
52419 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52420 assert_eq_m128i(r, e);
52421 }
52422
52423 #[simd_test(enable = "avx512f,avx512vl")]
52424 unsafe fn test_mm_maskz_srli_epi32() {
52425 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52426 let r = _mm_maskz_srli_epi32::<1>(0, a);
52427 assert_eq_m128i(r, _mm_setzero_si128());
52428 let r = _mm_maskz_srli_epi32::<1>(0b00001111, a);
52429 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52430 assert_eq_m128i(r, e);
52431 }
52432
52433 #[simd_test(enable = "avx512f")]
52434 unsafe fn test_mm512_rolv_epi32() {
52435 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52436 let b = _mm512_set1_epi32(1);
52437 let r = _mm512_rolv_epi32(a, b);
52438 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52439 assert_eq_m512i(r, e);
52440 }
52441
52442 #[simd_test(enable = "avx512f")]
52443 unsafe fn test_mm512_mask_rolv_epi32() {
52444 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52445 let b = _mm512_set1_epi32(1);
52446 let r = _mm512_mask_rolv_epi32(a, 0, a, b);
52447 assert_eq_m512i(r, a);
52448 let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
52449 let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52450 assert_eq_m512i(r, e);
52451 }
52452
52453 #[simd_test(enable = "avx512f")]
52454 unsafe fn test_mm512_maskz_rolv_epi32() {
52455 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52456 let b = _mm512_set1_epi32(1);
52457 let r = _mm512_maskz_rolv_epi32(0, a, b);
52458 assert_eq_m512i(r, _mm512_setzero_si512());
52459 let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
52460 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52461 assert_eq_m512i(r, e);
52462 }
52463
52464 #[simd_test(enable = "avx512f,avx512vl")]
52465 unsafe fn test_mm256_rolv_epi32() {
52466 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52467 let b = _mm256_set1_epi32(1);
52468 let r = _mm256_rolv_epi32(a, b);
52469 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52470 assert_eq_m256i(r, e);
52471 }
52472
52473 #[simd_test(enable = "avx512f,avx512vl")]
52474 unsafe fn test_mm256_mask_rolv_epi32() {
52475 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52476 let b = _mm256_set1_epi32(1);
52477 let r = _mm256_mask_rolv_epi32(a, 0, a, b);
52478 assert_eq_m256i(r, a);
52479 let r = _mm256_mask_rolv_epi32(a, 0b11111111, a, b);
52480 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52481 assert_eq_m256i(r, e);
52482 }
52483
52484 #[simd_test(enable = "avx512f,avx512vl")]
52485 unsafe fn test_mm256_maskz_rolv_epi32() {
52486 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52487 let b = _mm256_set1_epi32(1);
52488 let r = _mm256_maskz_rolv_epi32(0, a, b);
52489 assert_eq_m256i(r, _mm256_setzero_si256());
52490 let r = _mm256_maskz_rolv_epi32(0b11111111, a, b);
52491 let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52492 assert_eq_m256i(r, e);
52493 }
52494
52495 #[simd_test(enable = "avx512f,avx512vl")]
52496 unsafe fn test_mm_rolv_epi32() {
52497 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52498 let b = _mm_set1_epi32(1);
52499 let r = _mm_rolv_epi32(a, b);
52500 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52501 assert_eq_m128i(r, e);
52502 }
52503
52504 #[simd_test(enable = "avx512f,avx512vl")]
52505 unsafe fn test_mm_mask_rolv_epi32() {
52506 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52507 let b = _mm_set1_epi32(1);
52508 let r = _mm_mask_rolv_epi32(a, 0, a, b);
52509 assert_eq_m128i(r, a);
52510 let r = _mm_mask_rolv_epi32(a, 0b00001111, a, b);
52511 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52512 assert_eq_m128i(r, e);
52513 }
52514
52515 #[simd_test(enable = "avx512f,avx512vl")]
52516 unsafe fn test_mm_maskz_rolv_epi32() {
52517 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52518 let b = _mm_set1_epi32(1);
52519 let r = _mm_maskz_rolv_epi32(0, a, b);
52520 assert_eq_m128i(r, _mm_setzero_si128());
52521 let r = _mm_maskz_rolv_epi32(0b00001111, a, b);
52522 let e = _mm_set_epi32(1 << 0, 2, 2, 2);
52523 assert_eq_m128i(r, e);
52524 }
52525
52526 #[simd_test(enable = "avx512f")]
52527 unsafe fn test_mm512_rorv_epi32() {
52528 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52529 let b = _mm512_set1_epi32(1);
52530 let r = _mm512_rorv_epi32(a, b);
52531 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52532 assert_eq_m512i(r, e);
52533 }
52534
52535 #[simd_test(enable = "avx512f")]
52536 unsafe fn test_mm512_mask_rorv_epi32() {
52537 let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52538 let b = _mm512_set1_epi32(1);
52539 let r = _mm512_mask_rorv_epi32(a, 0, a, b);
52540 assert_eq_m512i(r, a);
52541 let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
52542 let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52543 assert_eq_m512i(r, e);
52544 }
52545
52546 #[simd_test(enable = "avx512f")]
52547 unsafe fn test_mm512_maskz_rorv_epi32() {
52548 let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
52549 let b = _mm512_set1_epi32(1);
52550 let r = _mm512_maskz_rorv_epi32(0, a, b);
52551 assert_eq_m512i(r, _mm512_setzero_si512());
52552 let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
52553 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52554 assert_eq_m512i(r, e);
52555 }
52556
52557 #[simd_test(enable = "avx512f,avx512vl")]
52558 unsafe fn test_mm256_rorv_epi32() {
52559 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52560 let b = _mm256_set1_epi32(1);
52561 let r = _mm256_rorv_epi32(a, b);
52562 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52563 assert_eq_m256i(r, e);
52564 }
52565
52566 #[simd_test(enable = "avx512f,avx512vl")]
52567 unsafe fn test_mm256_mask_rorv_epi32() {
52568 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52569 let b = _mm256_set1_epi32(1);
52570 let r = _mm256_mask_rorv_epi32(a, 0, a, b);
52571 assert_eq_m256i(r, a);
52572 let r = _mm256_mask_rorv_epi32(a, 0b11111111, a, b);
52573 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52574 assert_eq_m256i(r, e);
52575 }
52576
52577 #[simd_test(enable = "avx512f,avx512vl")]
52578 unsafe fn test_mm256_maskz_rorv_epi32() {
52579 let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
52580 let b = _mm256_set1_epi32(1);
52581 let r = _mm256_maskz_rorv_epi32(0, a, b);
52582 assert_eq_m256i(r, _mm256_setzero_si256());
52583 let r = _mm256_maskz_rorv_epi32(0b11111111, a, b);
52584 let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52585 assert_eq_m256i(r, e);
52586 }
52587
52588 #[simd_test(enable = "avx512f,avx512vl")]
52589 unsafe fn test_mm_rorv_epi32() {
52590 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52591 let b = _mm_set1_epi32(1);
52592 let r = _mm_rorv_epi32(a, b);
52593 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52594 assert_eq_m128i(r, e);
52595 }
52596
52597 #[simd_test(enable = "avx512f,avx512vl")]
52598 unsafe fn test_mm_mask_rorv_epi32() {
52599 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52600 let b = _mm_set1_epi32(1);
52601 let r = _mm_mask_rorv_epi32(a, 0, a, b);
52602 assert_eq_m128i(r, a);
52603 let r = _mm_mask_rorv_epi32(a, 0b00001111, a, b);
52604 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52605 assert_eq_m128i(r, e);
52606 }
52607
52608 #[simd_test(enable = "avx512f,avx512vl")]
52609 unsafe fn test_mm_maskz_rorv_epi32() {
52610 let a = _mm_set_epi32(1 << 0, 2, 2, 2);
52611 let b = _mm_set1_epi32(1);
52612 let r = _mm_maskz_rorv_epi32(0, a, b);
52613 assert_eq_m128i(r, _mm_setzero_si128());
52614 let r = _mm_maskz_rorv_epi32(0b00001111, a, b);
52615 let e = _mm_set_epi32(1 << 31, 1, 1, 1);
52616 assert_eq_m128i(r, e);
52617 }
52618
52619 #[simd_test(enable = "avx512f")]
52620 unsafe fn test_mm512_sllv_epi32() {
52621 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52622 let count = _mm512_set1_epi32(1);
52623 let r = _mm512_sllv_epi32(a, count);
52624 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52625 assert_eq_m512i(r, e);
52626 }
52627
52628 #[simd_test(enable = "avx512f")]
52629 unsafe fn test_mm512_mask_sllv_epi32() {
52630 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52631 let count = _mm512_set1_epi32(1);
52632 let r = _mm512_mask_sllv_epi32(a, 0, a, count);
52633 assert_eq_m512i(r, a);
52634 let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
52635 let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52636 assert_eq_m512i(r, e);
52637 }
52638
52639 #[simd_test(enable = "avx512f")]
52640 unsafe fn test_mm512_maskz_sllv_epi32() {
52641 let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
52642 let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52643 let r = _mm512_maskz_sllv_epi32(0, a, count);
52644 assert_eq_m512i(r, _mm512_setzero_si512());
52645 let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
52646 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
52647 assert_eq_m512i(r, e);
52648 }
52649
52650 #[simd_test(enable = "avx512f,avx512vl")]
52651 unsafe fn test_mm256_mask_sllv_epi32() {
52652 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52653 let count = _mm256_set1_epi32(1);
52654 let r = _mm256_mask_sllv_epi32(a, 0, a, count);
52655 assert_eq_m256i(r, a);
52656 let r = _mm256_mask_sllv_epi32(a, 0b11111111, a, count);
52657 let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52658 assert_eq_m256i(r, e);
52659 }
52660
52661 #[simd_test(enable = "avx512f,avx512vl")]
52662 unsafe fn test_mm256_maskz_sllv_epi32() {
52663 let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
52664 let count = _mm256_set1_epi32(1);
52665 let r = _mm256_maskz_sllv_epi32(0, a, count);
52666 assert_eq_m256i(r, _mm256_setzero_si256());
52667 let r = _mm256_maskz_sllv_epi32(0b11111111, a, count);
52668 let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
52669 assert_eq_m256i(r, e);
52670 }
52671
52672 #[simd_test(enable = "avx512f,avx512vl")]
52673 unsafe fn test_mm_mask_sllv_epi32() {
52674 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52675 let count = _mm_set1_epi32(1);
52676 let r = _mm_mask_sllv_epi32(a, 0, a, count);
52677 assert_eq_m128i(r, a);
52678 let r = _mm_mask_sllv_epi32(a, 0b00001111, a, count);
52679 let e = _mm_set_epi32(0, 2, 2, 2);
52680 assert_eq_m128i(r, e);
52681 }
52682
52683 #[simd_test(enable = "avx512f,avx512vl")]
52684 unsafe fn test_mm_maskz_sllv_epi32() {
52685 let a = _mm_set_epi32(1 << 31, 1, 1, 1);
52686 let count = _mm_set1_epi32(1);
52687 let r = _mm_maskz_sllv_epi32(0, a, count);
52688 assert_eq_m128i(r, _mm_setzero_si128());
52689 let r = _mm_maskz_sllv_epi32(0b00001111, a, count);
52690 let e = _mm_set_epi32(0, 2, 2, 2);
52691 assert_eq_m128i(r, e);
52692 }
52693
52694 #[simd_test(enable = "avx512f")]
52695 unsafe fn test_mm512_srlv_epi32() {
52696 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52697 let count = _mm512_set1_epi32(1);
52698 let r = _mm512_srlv_epi32(a, count);
52699 let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52700 assert_eq_m512i(r, e);
52701 }
52702
52703 #[simd_test(enable = "avx512f")]
52704 unsafe fn test_mm512_mask_srlv_epi32() {
52705 let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
52706 let count = _mm512_set1_epi32(1);
52707 let r = _mm512_mask_srlv_epi32(a, 0, a, count);
52708 assert_eq_m512i(r, a);
52709 let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
52710 let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52711 assert_eq_m512i(r, e);
52712 }
52713
52714 #[simd_test(enable = "avx512f")]
52715 unsafe fn test_mm512_maskz_srlv_epi32() {
52716 let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
52717 let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
52718 let r = _mm512_maskz_srlv_epi32(0, a, count);
52719 assert_eq_m512i(r, _mm512_setzero_si512());
52720 let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
52721 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
52722 assert_eq_m512i(r, e);
52723 }
52724
52725 #[simd_test(enable = "avx512f,avx512vl")]
52726 unsafe fn test_mm256_mask_srlv_epi32() {
52727 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52728 let count = _mm256_set1_epi32(1);
52729 let r = _mm256_mask_srlv_epi32(a, 0, a, count);
52730 assert_eq_m256i(r, a);
52731 let r = _mm256_mask_srlv_epi32(a, 0b11111111, a, count);
52732 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52733 assert_eq_m256i(r, e);
52734 }
52735
52736 #[simd_test(enable = "avx512f,avx512vl")]
52737 unsafe fn test_mm256_maskz_srlv_epi32() {
52738 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52739 let count = _mm256_set1_epi32(1);
52740 let r = _mm256_maskz_srlv_epi32(0, a, count);
52741 assert_eq_m256i(r, _mm256_setzero_si256());
52742 let r = _mm256_maskz_srlv_epi32(0b11111111, a, count);
52743 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52744 assert_eq_m256i(r, e);
52745 }
52746
52747 #[simd_test(enable = "avx512f,avx512vl")]
52748 unsafe fn test_mm_mask_srlv_epi32() {
52749 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52750 let count = _mm_set1_epi32(1);
52751 let r = _mm_mask_srlv_epi32(a, 0, a, count);
52752 assert_eq_m128i(r, a);
52753 let r = _mm_mask_srlv_epi32(a, 0b00001111, a, count);
52754 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52755 assert_eq_m128i(r, e);
52756 }
52757
52758 #[simd_test(enable = "avx512f,avx512vl")]
52759 unsafe fn test_mm_maskz_srlv_epi32() {
52760 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52761 let count = _mm_set1_epi32(1);
52762 let r = _mm_maskz_srlv_epi32(0, a, count);
52763 assert_eq_m128i(r, _mm_setzero_si128());
52764 let r = _mm_maskz_srlv_epi32(0b00001111, a, count);
52765 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52766 assert_eq_m128i(r, e);
52767 }
52768
52769 #[simd_test(enable = "avx512f")]
52770 unsafe fn test_mm512_sll_epi32() {
52771 #[rustfmt::skip]
52772 let a = _mm512_set_epi32(
52773 1 << 31, 1 << 0, 1 << 1, 1 << 2,
52774 0, 0, 0, 0,
52775 0, 0, 0, 0,
52776 0, 0, 0, 0,
52777 );
52778 let count = _mm_set_epi32(0, 0, 0, 2);
52779 let r = _mm512_sll_epi32(a, count);
52780 #[rustfmt::skip]
52781 let e = _mm512_set_epi32(
52782 0, 1 << 2, 1 << 3, 1 << 4,
52783 0, 0, 0, 0,
52784 0, 0, 0, 0,
52785 0, 0, 0, 0,
52786 );
52787 assert_eq_m512i(r, e);
52788 }
52789
52790 #[simd_test(enable = "avx512f")]
52791 unsafe fn test_mm512_mask_sll_epi32() {
52792 #[rustfmt::skip]
52793 let a = _mm512_set_epi32(
52794 1 << 31, 1 << 0, 1 << 1, 1 << 2,
52795 0, 0, 0, 0,
52796 0, 0, 0, 0,
52797 0, 0, 0, 0,
52798 );
52799 let count = _mm_set_epi32(0, 0, 0, 2);
52800 let r = _mm512_mask_sll_epi32(a, 0, a, count);
52801 assert_eq_m512i(r, a);
52802 let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
52803 #[rustfmt::skip]
52804 let e = _mm512_set_epi32(
52805 0, 1 << 2, 1 << 3, 1 << 4,
52806 0, 0, 0, 0,
52807 0, 0, 0, 0,
52808 0, 0, 0, 0,
52809 );
52810 assert_eq_m512i(r, e);
52811 }
52812
52813 #[simd_test(enable = "avx512f")]
52814 unsafe fn test_mm512_maskz_sll_epi32() {
52815 #[rustfmt::skip]
52816 let a = _mm512_set_epi32(
52817 1 << 31, 1 << 0, 1 << 1, 1 << 2,
52818 0, 0, 0, 0,
52819 0, 0, 0, 0,
52820 0, 0, 0, 1 << 31,
52821 );
52822 let count = _mm_set_epi32(2, 0, 0, 2);
52823 let r = _mm512_maskz_sll_epi32(0, a, count);
52824 assert_eq_m512i(r, _mm512_setzero_si512());
52825 let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
52826 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52827 assert_eq_m512i(r, e);
52828 }
52829
52830 #[simd_test(enable = "avx512f,avx512vl")]
52831 unsafe fn test_mm256_mask_sll_epi32() {
52832 let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
52833 let count = _mm_set_epi32(0, 0, 0, 1);
52834 let r = _mm256_mask_sll_epi32(a, 0, a, count);
52835 assert_eq_m256i(r, a);
52836 let r = _mm256_mask_sll_epi32(a, 0b11111111, a, count);
52837 let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
52838 assert_eq_m256i(r, e);
52839 }
52840
52841 #[simd_test(enable = "avx512f,avx512vl")]
52842 unsafe fn test_mm256_maskz_sll_epi32() {
52843 let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
52844 let count = _mm_set_epi32(0, 0, 0, 1);
52845 let r = _mm256_maskz_sll_epi32(0, a, count);
52846 assert_eq_m256i(r, _mm256_setzero_si256());
52847 let r = _mm256_maskz_sll_epi32(0b11111111, a, count);
52848 let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
52849 assert_eq_m256i(r, e);
52850 }
52851
52852 #[simd_test(enable = "avx512f,avx512vl")]
52853 unsafe fn test_mm_mask_sll_epi32() {
52854 let a = _mm_set_epi32(1 << 13, 0, 0, 0);
52855 let count = _mm_set_epi32(0, 0, 0, 1);
52856 let r = _mm_mask_sll_epi32(a, 0, a, count);
52857 assert_eq_m128i(r, a);
52858 let r = _mm_mask_sll_epi32(a, 0b00001111, a, count);
52859 let e = _mm_set_epi32(1 << 14, 0, 0, 0);
52860 assert_eq_m128i(r, e);
52861 }
52862
52863 #[simd_test(enable = "avx512f,avx512vl")]
52864 unsafe fn test_mm_maskz_sll_epi32() {
52865 let a = _mm_set_epi32(1 << 13, 0, 0, 0);
52866 let count = _mm_set_epi32(0, 0, 0, 1);
52867 let r = _mm_maskz_sll_epi32(0, a, count);
52868 assert_eq_m128i(r, _mm_setzero_si128());
52869 let r = _mm_maskz_sll_epi32(0b00001111, a, count);
52870 let e = _mm_set_epi32(1 << 14, 0, 0, 0);
52871 assert_eq_m128i(r, e);
52872 }
52873
52874 #[simd_test(enable = "avx512f")]
52875 unsafe fn test_mm512_srl_epi32() {
52876 #[rustfmt::skip]
52877 let a = _mm512_set_epi32(
52878 1 << 31, 1 << 0, 1 << 1, 1 << 2,
52879 0, 0, 0, 0,
52880 0, 0, 0, 0,
52881 0, 0, 0, 0,
52882 );
52883 let count = _mm_set_epi32(0, 0, 0, 2);
52884 let r = _mm512_srl_epi32(a, count);
52885 let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52886 assert_eq_m512i(r, e);
52887 }
52888
52889 #[simd_test(enable = "avx512f")]
52890 unsafe fn test_mm512_mask_srl_epi32() {
52891 #[rustfmt::skip]
52892 let a = _mm512_set_epi32(
52893 1 << 31, 1 << 0, 1 << 1, 1 << 2,
52894 0, 0, 0, 0,
52895 0, 0, 0, 0,
52896 0, 0, 0, 0,
52897 );
52898 let count = _mm_set_epi32(0, 0, 0, 2);
52899 let r = _mm512_mask_srl_epi32(a, 0, a, count);
52900 assert_eq_m512i(r, a);
52901 let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
52902 let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52903 assert_eq_m512i(r, e);
52904 }
52905
52906 #[simd_test(enable = "avx512f")]
52907 unsafe fn test_mm512_maskz_srl_epi32() {
52908 #[rustfmt::skip]
52909 let a = _mm512_set_epi32(
52910 1 << 31, 1 << 0, 1 << 1, 1 << 2,
52911 0, 0, 0, 0,
52912 0, 0, 0, 0,
52913 0, 0, 0, 1 << 31,
52914 );
52915 let count = _mm_set_epi32(2, 0, 0, 2);
52916 let r = _mm512_maskz_srl_epi32(0, a, count);
52917 assert_eq_m512i(r, _mm512_setzero_si512());
52918 let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
52919 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
52920 assert_eq_m512i(r, e);
52921 }
52922
52923 #[simd_test(enable = "avx512f,avx512vl")]
52924 unsafe fn test_mm256_mask_srl_epi32() {
52925 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52926 let count = _mm_set_epi32(0, 0, 0, 1);
52927 let r = _mm256_mask_srl_epi32(a, 0, a, count);
52928 assert_eq_m256i(r, a);
52929 let r = _mm256_mask_srl_epi32(a, 0b11111111, a, count);
52930 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52931 assert_eq_m256i(r, e);
52932 }
52933
52934 #[simd_test(enable = "avx512f,avx512vl")]
52935 unsafe fn test_mm256_maskz_srl_epi32() {
52936 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
52937 let count = _mm_set_epi32(0, 0, 0, 1);
52938 let r = _mm256_maskz_srl_epi32(0, a, count);
52939 assert_eq_m256i(r, _mm256_setzero_si256());
52940 let r = _mm256_maskz_srl_epi32(0b11111111, a, count);
52941 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
52942 assert_eq_m256i(r, e);
52943 }
52944
52945 #[simd_test(enable = "avx512f,avx512vl")]
52946 unsafe fn test_mm_mask_srl_epi32() {
52947 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52948 let count = _mm_set_epi32(0, 0, 0, 1);
52949 let r = _mm_mask_srl_epi32(a, 0, a, count);
52950 assert_eq_m128i(r, a);
52951 let r = _mm_mask_srl_epi32(a, 0b00001111, a, count);
52952 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52953 assert_eq_m128i(r, e);
52954 }
52955
52956 #[simd_test(enable = "avx512f,avx512vl")]
52957 unsafe fn test_mm_maskz_srl_epi32() {
52958 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
52959 let count = _mm_set_epi32(0, 0, 0, 1);
52960 let r = _mm_maskz_srl_epi32(0, a, count);
52961 assert_eq_m128i(r, _mm_setzero_si128());
52962 let r = _mm_maskz_srl_epi32(0b00001111, a, count);
52963 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
52964 assert_eq_m128i(r, e);
52965 }
52966
52967 #[simd_test(enable = "avx512f")]
52968 unsafe fn test_mm512_sra_epi32() {
52969 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
52970 let count = _mm_set_epi32(1, 0, 0, 2);
52971 let r = _mm512_sra_epi32(a, count);
52972 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
52973 assert_eq_m512i(r, e);
52974 }
52975
52976 #[simd_test(enable = "avx512f")]
52977 unsafe fn test_mm512_mask_sra_epi32() {
52978 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
52979 let count = _mm_set_epi32(0, 0, 0, 2);
52980 let r = _mm512_mask_sra_epi32(a, 0, a, count);
52981 assert_eq_m512i(r, a);
52982 let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
52983 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
52984 assert_eq_m512i(r, e);
52985 }
52986
52987 #[simd_test(enable = "avx512f")]
52988 unsafe fn test_mm512_maskz_sra_epi32() {
52989 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
52990 let count = _mm_set_epi32(2, 0, 0, 2);
52991 let r = _mm512_maskz_sra_epi32(0, a, count);
52992 assert_eq_m512i(r, _mm512_setzero_si512());
52993 let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
52994 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
52995 assert_eq_m512i(r, e);
52996 }
52997
52998 #[simd_test(enable = "avx512f,avx512vl")]
52999 unsafe fn test_mm256_mask_sra_epi32() {
53000 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53001 let count = _mm_set_epi32(0, 0, 0, 1);
53002 let r = _mm256_mask_sra_epi32(a, 0, a, count);
53003 assert_eq_m256i(r, a);
53004 let r = _mm256_mask_sra_epi32(a, 0b11111111, a, count);
53005 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53006 assert_eq_m256i(r, e);
53007 }
53008
53009 #[simd_test(enable = "avx512f,avx512vl")]
53010 unsafe fn test_mm256_maskz_sra_epi32() {
53011 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53012 let count = _mm_set_epi32(0, 0, 0, 1);
53013 let r = _mm256_maskz_sra_epi32(0, a, count);
53014 assert_eq_m256i(r, _mm256_setzero_si256());
53015 let r = _mm256_maskz_sra_epi32(0b11111111, a, count);
53016 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53017 assert_eq_m256i(r, e);
53018 }
53019
53020 #[simd_test(enable = "avx512f,avx512vl")]
53021 unsafe fn test_mm_mask_sra_epi32() {
53022 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53023 let count = _mm_set_epi32(0, 0, 0, 1);
53024 let r = _mm_mask_sra_epi32(a, 0, a, count);
53025 assert_eq_m128i(r, a);
53026 let r = _mm_mask_sra_epi32(a, 0b00001111, a, count);
53027 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53028 assert_eq_m128i(r, e);
53029 }
53030
53031 #[simd_test(enable = "avx512f,avx512vl")]
53032 unsafe fn test_mm_maskz_sra_epi32() {
53033 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53034 let count = _mm_set_epi32(0, 0, 0, 1);
53035 let r = _mm_maskz_sra_epi32(0, a, count);
53036 assert_eq_m128i(r, _mm_setzero_si128());
53037 let r = _mm_maskz_sra_epi32(0b00001111, a, count);
53038 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53039 assert_eq_m128i(r, e);
53040 }
53041
53042 #[simd_test(enable = "avx512f")]
53043 unsafe fn test_mm512_srav_epi32() {
53044 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53045 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
53046 let r = _mm512_srav_epi32(a, count);
53047 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53048 assert_eq_m512i(r, e);
53049 }
53050
53051 #[simd_test(enable = "avx512f")]
53052 unsafe fn test_mm512_mask_srav_epi32() {
53053 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
53054 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
53055 let r = _mm512_mask_srav_epi32(a, 0, a, count);
53056 assert_eq_m512i(r, a);
53057 let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
53058 let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
53059 assert_eq_m512i(r, e);
53060 }
53061
53062 #[simd_test(enable = "avx512f")]
53063 unsafe fn test_mm512_maskz_srav_epi32() {
53064 let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
53065 let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
53066 let r = _mm512_maskz_srav_epi32(0, a, count);
53067 assert_eq_m512i(r, _mm512_setzero_si512());
53068 let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
53069 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
53070 assert_eq_m512i(r, e);
53071 }
53072
53073 #[simd_test(enable = "avx512f,avx512vl")]
53074 unsafe fn test_mm256_mask_srav_epi32() {
53075 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53076 let count = _mm256_set1_epi32(1);
53077 let r = _mm256_mask_srav_epi32(a, 0, a, count);
53078 assert_eq_m256i(r, a);
53079 let r = _mm256_mask_srav_epi32(a, 0b11111111, a, count);
53080 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53081 assert_eq_m256i(r, e);
53082 }
53083
53084 #[simd_test(enable = "avx512f,avx512vl")]
53085 unsafe fn test_mm256_maskz_srav_epi32() {
53086 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53087 let count = _mm256_set1_epi32(1);
53088 let r = _mm256_maskz_srav_epi32(0, a, count);
53089 assert_eq_m256i(r, _mm256_setzero_si256());
53090 let r = _mm256_maskz_srav_epi32(0b11111111, a, count);
53091 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53092 assert_eq_m256i(r, e);
53093 }
53094
53095 #[simd_test(enable = "avx512f,avx512vl")]
53096 unsafe fn test_mm_mask_srav_epi32() {
53097 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53098 let count = _mm_set1_epi32(1);
53099 let r = _mm_mask_srav_epi32(a, 0, a, count);
53100 assert_eq_m128i(r, a);
53101 let r = _mm_mask_srav_epi32(a, 0b00001111, a, count);
53102 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53103 assert_eq_m128i(r, e);
53104 }
53105
53106 #[simd_test(enable = "avx512f,avx512vl")]
53107 unsafe fn test_mm_maskz_srav_epi32() {
53108 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53109 let count = _mm_set1_epi32(1);
53110 let r = _mm_maskz_srav_epi32(0, a, count);
53111 assert_eq_m128i(r, _mm_setzero_si128());
53112 let r = _mm_maskz_srav_epi32(0b00001111, a, count);
53113 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53114 assert_eq_m128i(r, e);
53115 }
53116
53117 #[simd_test(enable = "avx512f")]
53118 unsafe fn test_mm512_srai_epi32() {
53119 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
53120 let r = _mm512_srai_epi32::<2>(a);
53121 let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
53122 assert_eq_m512i(r, e);
53123 }
53124
53125 #[simd_test(enable = "avx512f")]
53126 unsafe fn test_mm512_mask_srai_epi32() {
53127 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
53128 let r = _mm512_mask_srai_epi32::<2>(a, 0, a);
53129 assert_eq_m512i(r, a);
53130 let r = _mm512_mask_srai_epi32::<2>(a, 0b11111111_11111111, a);
53131 let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
53132 assert_eq_m512i(r, e);
53133 }
53134
53135 #[simd_test(enable = "avx512f")]
53136 unsafe fn test_mm512_maskz_srai_epi32() {
53137 let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
53138 let r = _mm512_maskz_srai_epi32::<2>(0, a);
53139 assert_eq_m512i(r, _mm512_setzero_si512());
53140 let r = _mm512_maskz_srai_epi32::<2>(0b00000000_11111111, a);
53141 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
53142 assert_eq_m512i(r, e);
53143 }
53144
53145 #[simd_test(enable = "avx512f,avx512vl")]
53146 unsafe fn test_mm256_mask_srai_epi32() {
53147 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53148 let r = _mm256_mask_srai_epi32::<1>(a, 0, a);
53149 assert_eq_m256i(r, a);
53150 let r = _mm256_mask_srai_epi32::<1>(a, 0b11111111, a);
53151 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53152 assert_eq_m256i(r, e);
53153 }
53154
53155 #[simd_test(enable = "avx512f,avx512vl")]
53156 unsafe fn test_mm256_maskz_srai_epi32() {
53157 let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
53158 let r = _mm256_maskz_srai_epi32::<1>(0, a);
53159 assert_eq_m256i(r, _mm256_setzero_si256());
53160 let r = _mm256_maskz_srai_epi32::<1>(0b11111111, a);
53161 let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
53162 assert_eq_m256i(r, e);
53163 }
53164
53165 #[simd_test(enable = "avx512f,avx512vl")]
53166 unsafe fn test_mm_mask_srai_epi32() {
53167 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53168 let r = _mm_mask_srai_epi32::<1>(a, 0, a);
53169 assert_eq_m128i(r, a);
53170 let r = _mm_mask_srai_epi32::<1>(a, 0b00001111, a);
53171 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53172 assert_eq_m128i(r, e);
53173 }
53174
53175 #[simd_test(enable = "avx512f,avx512vl")]
53176 unsafe fn test_mm_maskz_srai_epi32() {
53177 let a = _mm_set_epi32(1 << 5, 0, 0, 0);
53178 let r = _mm_maskz_srai_epi32::<1>(0, a);
53179 assert_eq_m128i(r, _mm_setzero_si128());
53180 let r = _mm_maskz_srai_epi32::<1>(0b00001111, a);
53181 let e = _mm_set_epi32(1 << 4, 0, 0, 0);
53182 assert_eq_m128i(r, e);
53183 }
53184
53185 #[simd_test(enable = "avx512f")]
53186 unsafe fn test_mm512_permute_ps() {
53187 let a = _mm512_setr_ps(
53188 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53189 );
53190 let r = _mm512_permute_ps::<0b11_11_11_11>(a);
53191 let e = _mm512_setr_ps(
53192 3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53193 );
53194 assert_eq_m512(r, e);
53195 }
53196
53197 #[simd_test(enable = "avx512f")]
53198 unsafe fn test_mm512_mask_permute_ps() {
53199 let a = _mm512_setr_ps(
53200 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53201 );
53202 let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53203 assert_eq_m512(r, a);
53204 let r = _mm512_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111_11111111, a);
53205 let e = _mm512_setr_ps(
53206 3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53207 );
53208 assert_eq_m512(r, e);
53209 }
53210
53211 #[simd_test(enable = "avx512f")]
53212 unsafe fn test_mm512_maskz_permute_ps() {
53213 let a = _mm512_setr_ps(
53214 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53215 );
53216 let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0, a);
53217 assert_eq_m512(r, _mm512_setzero_ps());
53218 let r = _mm512_maskz_permute_ps::<0b11_11_11_11>(0b11111111_11111111, a);
53219 let e = _mm512_setr_ps(
53220 3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
53221 );
53222 assert_eq_m512(r, e);
53223 }
53224
53225 #[simd_test(enable = "avx512f,avx512vl")]
53226 unsafe fn test_mm256_mask_permute_ps() {
53227 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53228 let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53229 assert_eq_m256(r, a);
53230 let r = _mm256_mask_permute_ps::<0b11_11_11_11>(a, 0b11111111, a);
53231 let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
53232 assert_eq_m256(r, e);
53233 }
53234
53235 #[simd_test(enable = "avx512f,avx512vl")]
53236 unsafe fn test_mm256_maskz_permute_ps() {
53237 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53238 let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0, a);
53239 assert_eq_m256(r, _mm256_setzero_ps());
53240 let r = _mm256_maskz_permute_ps::<0b11_11_11_11>(0b11111111, a);
53241 let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
53242 assert_eq_m256(r, e);
53243 }
53244
53245 #[simd_test(enable = "avx512f,avx512vl")]
53246 unsafe fn test_mm_mask_permute_ps() {
53247 let a = _mm_set_ps(0., 1., 2., 3.);
53248 let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0, a);
53249 assert_eq_m128(r, a);
53250 let r = _mm_mask_permute_ps::<0b11_11_11_11>(a, 0b00001111, a);
53251 let e = _mm_set_ps(0., 0., 0., 0.);
53252 assert_eq_m128(r, e);
53253 }
53254
53255 #[simd_test(enable = "avx512f,avx512vl")]
53256 unsafe fn test_mm_maskz_permute_ps() {
53257 let a = _mm_set_ps(0., 1., 2., 3.);
53258 let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0, a);
53259 assert_eq_m128(r, _mm_setzero_ps());
53260 let r = _mm_maskz_permute_ps::<0b11_11_11_11>(0b00001111, a);
53261 let e = _mm_set_ps(0., 0., 0., 0.);
53262 assert_eq_m128(r, e);
53263 }
53264
53265 #[simd_test(enable = "avx512f")]
53266 unsafe fn test_mm512_permutevar_epi32() {
53267 let idx = _mm512_set1_epi32(1);
53268 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53269 let r = _mm512_permutevar_epi32(idx, a);
53270 let e = _mm512_set1_epi32(14);
53271 assert_eq_m512i(r, e);
53272 }
53273
53274 #[simd_test(enable = "avx512f")]
53275 unsafe fn test_mm512_mask_permutevar_epi32() {
53276 let idx = _mm512_set1_epi32(1);
53277 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53278 let r = _mm512_mask_permutevar_epi32(a, 0, idx, a);
53279 assert_eq_m512i(r, a);
53280 let r = _mm512_mask_permutevar_epi32(a, 0b11111111_11111111, idx, a);
53281 let e = _mm512_set1_epi32(14);
53282 assert_eq_m512i(r, e);
53283 }
53284
53285 #[simd_test(enable = "avx512f")]
53286 unsafe fn test_mm512_permutevar_ps() {
53287 let a = _mm512_set_ps(
53288 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53289 );
53290 let b = _mm512_set1_epi32(0b01);
53291 let r = _mm512_permutevar_ps(a, b);
53292 let e = _mm512_set_ps(
53293 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
53294 );
53295 assert_eq_m512(r, e);
53296 }
53297
53298 #[simd_test(enable = "avx512f")]
53299 unsafe fn test_mm512_mask_permutevar_ps() {
53300 let a = _mm512_set_ps(
53301 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53302 );
53303 let b = _mm512_set1_epi32(0b01);
53304 let r = _mm512_mask_permutevar_ps(a, 0, a, b);
53305 assert_eq_m512(r, a);
53306 let r = _mm512_mask_permutevar_ps(a, 0b11111111_11111111, a, b);
53307 let e = _mm512_set_ps(
53308 2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
53309 );
53310 assert_eq_m512(r, e);
53311 }
53312
53313 #[simd_test(enable = "avx512f")]
53314 unsafe fn test_mm512_maskz_permutevar_ps() {
53315 let a = _mm512_set_ps(
53316 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53317 );
53318 let b = _mm512_set1_epi32(0b01);
53319 let r = _mm512_maskz_permutevar_ps(0, a, b);
53320 assert_eq_m512(r, _mm512_setzero_ps());
53321 let r = _mm512_maskz_permutevar_ps(0b00000000_11111111, a, b);
53322 let e = _mm512_set_ps(
53323 0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
53324 );
53325 assert_eq_m512(r, e);
53326 }
53327
53328 #[simd_test(enable = "avx512f,avx512vl")]
53329 unsafe fn test_mm256_mask_permutevar_ps() {
53330 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53331 let b = _mm256_set1_epi32(0b01);
53332 let r = _mm256_mask_permutevar_ps(a, 0, a, b);
53333 assert_eq_m256(r, a);
53334 let r = _mm256_mask_permutevar_ps(a, 0b11111111, a, b);
53335 let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
53336 assert_eq_m256(r, e);
53337 }
53338
53339 #[simd_test(enable = "avx512f,avx512vl")]
53340 unsafe fn test_mm256_maskz_permutevar_ps() {
53341 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53342 let b = _mm256_set1_epi32(0b01);
53343 let r = _mm256_maskz_permutevar_ps(0, a, b);
53344 assert_eq_m256(r, _mm256_setzero_ps());
53345 let r = _mm256_maskz_permutevar_ps(0b11111111, a, b);
53346 let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
53347 assert_eq_m256(r, e);
53348 }
53349
53350 #[simd_test(enable = "avx512f,avx512vl")]
53351 unsafe fn test_mm_mask_permutevar_ps() {
53352 let a = _mm_set_ps(0., 1., 2., 3.);
53353 let b = _mm_set1_epi32(0b01);
53354 let r = _mm_mask_permutevar_ps(a, 0, a, b);
53355 assert_eq_m128(r, a);
53356 let r = _mm_mask_permutevar_ps(a, 0b00001111, a, b);
53357 let e = _mm_set_ps(2., 2., 2., 2.);
53358 assert_eq_m128(r, e);
53359 }
53360
53361 #[simd_test(enable = "avx512f,avx512vl")]
53362 unsafe fn test_mm_maskz_permutevar_ps() {
53363 let a = _mm_set_ps(0., 1., 2., 3.);
53364 let b = _mm_set1_epi32(0b01);
53365 let r = _mm_maskz_permutevar_ps(0, a, b);
53366 assert_eq_m128(r, _mm_setzero_ps());
53367 let r = _mm_maskz_permutevar_ps(0b00001111, a, b);
53368 let e = _mm_set_ps(2., 2., 2., 2.);
53369 assert_eq_m128(r, e);
53370 }
53371
53372 #[simd_test(enable = "avx512f")]
53373 unsafe fn test_mm512_permutexvar_epi32() {
53374 let idx = _mm512_set1_epi32(1);
53375 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53376 let r = _mm512_permutexvar_epi32(idx, a);
53377 let e = _mm512_set1_epi32(14);
53378 assert_eq_m512i(r, e);
53379 }
53380
53381 #[simd_test(enable = "avx512f")]
53382 unsafe fn test_mm512_mask_permutexvar_epi32() {
53383 let idx = _mm512_set1_epi32(1);
53384 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53385 let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a);
53386 assert_eq_m512i(r, a);
53387 let r = _mm512_mask_permutexvar_epi32(a, 0b11111111_11111111, idx, a);
53388 let e = _mm512_set1_epi32(14);
53389 assert_eq_m512i(r, e);
53390 }
53391
53392 #[simd_test(enable = "avx512f")]
53393 unsafe fn test_mm512_maskz_permutexvar_epi32() {
53394 let idx = _mm512_set1_epi32(1);
53395 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53396 let r = _mm512_maskz_permutexvar_epi32(0, idx, a);
53397 assert_eq_m512i(r, _mm512_setzero_si512());
53398 let r = _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx, a);
53399 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14);
53400 assert_eq_m512i(r, e);
53401 }
53402
53403 #[simd_test(enable = "avx512f,avx512vl")]
53404 unsafe fn test_mm256_permutexvar_epi32() {
53405 let idx = _mm256_set1_epi32(1);
53406 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53407 let r = _mm256_permutexvar_epi32(idx, a);
53408 let e = _mm256_set1_epi32(6);
53409 assert_eq_m256i(r, e);
53410 }
53411
53412 #[simd_test(enable = "avx512f,avx512vl")]
53413 unsafe fn test_mm256_mask_permutexvar_epi32() {
53414 let idx = _mm256_set1_epi32(1);
53415 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53416 let r = _mm256_mask_permutexvar_epi32(a, 0, idx, a);
53417 assert_eq_m256i(r, a);
53418 let r = _mm256_mask_permutexvar_epi32(a, 0b11111111, idx, a);
53419 let e = _mm256_set1_epi32(6);
53420 assert_eq_m256i(r, e);
53421 }
53422
53423 #[simd_test(enable = "avx512f,avx512vl")]
53424 unsafe fn test_mm256_maskz_permutexvar_epi32() {
53425 let idx = _mm256_set1_epi32(1);
53426 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53427 let r = _mm256_maskz_permutexvar_epi32(0, idx, a);
53428 assert_eq_m256i(r, _mm256_setzero_si256());
53429 let r = _mm256_maskz_permutexvar_epi32(0b11111111, idx, a);
53430 let e = _mm256_set1_epi32(6);
53431 assert_eq_m256i(r, e);
53432 }
53433
53434 #[simd_test(enable = "avx512f")]
53435 unsafe fn test_mm512_permutexvar_ps() {
53436 let idx = _mm512_set1_epi32(1);
53437 let a = _mm512_set_ps(
53438 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53439 );
53440 let r = _mm512_permutexvar_ps(idx, a);
53441 let e = _mm512_set1_ps(14.);
53442 assert_eq_m512(r, e);
53443 }
53444
53445 #[simd_test(enable = "avx512f")]
53446 unsafe fn test_mm512_mask_permutexvar_ps() {
53447 let idx = _mm512_set1_epi32(1);
53448 let a = _mm512_set_ps(
53449 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53450 );
53451 let r = _mm512_mask_permutexvar_ps(a, 0, idx, a);
53452 assert_eq_m512(r, a);
53453 let r = _mm512_mask_permutexvar_ps(a, 0b11111111_11111111, idx, a);
53454 let e = _mm512_set1_ps(14.);
53455 assert_eq_m512(r, e);
53456 }
53457
53458 #[simd_test(enable = "avx512f")]
53459 unsafe fn test_mm512_maskz_permutexvar_ps() {
53460 let idx = _mm512_set1_epi32(1);
53461 let a = _mm512_set_ps(
53462 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53463 );
53464 let r = _mm512_maskz_permutexvar_ps(0, idx, a);
53465 assert_eq_m512(r, _mm512_setzero_ps());
53466 let r = _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx, a);
53467 let e = _mm512_set_ps(
53468 0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14.,
53469 );
53470 assert_eq_m512(r, e);
53471 }
53472
53473 #[simd_test(enable = "avx512f,avx512vl")]
53474 unsafe fn test_mm256_permutexvar_ps() {
53475 let idx = _mm256_set1_epi32(1);
53476 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53477 let r = _mm256_permutexvar_ps(idx, a);
53478 let e = _mm256_set1_ps(6.);
53479 assert_eq_m256(r, e);
53480 }
53481
53482 #[simd_test(enable = "avx512f,avx512vl")]
53483 unsafe fn test_mm256_mask_permutexvar_ps() {
53484 let idx = _mm256_set1_epi32(1);
53485 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53486 let r = _mm256_mask_permutexvar_ps(a, 0, idx, a);
53487 assert_eq_m256(r, a);
53488 let r = _mm256_mask_permutexvar_ps(a, 0b11111111, idx, a);
53489 let e = _mm256_set1_ps(6.);
53490 assert_eq_m256(r, e);
53491 }
53492
53493 #[simd_test(enable = "avx512f,avx512vl")]
53494 unsafe fn test_mm256_maskz_permutexvar_ps() {
53495 let idx = _mm256_set1_epi32(1);
53496 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53497 let r = _mm256_maskz_permutexvar_ps(0, idx, a);
53498 assert_eq_m256(r, _mm256_setzero_ps());
53499 let r = _mm256_maskz_permutexvar_ps(0b11111111, idx, a);
53500 let e = _mm256_set1_ps(6.);
53501 assert_eq_m256(r, e);
53502 }
53503
53504 #[simd_test(enable = "avx512f")]
53505 unsafe fn test_mm512_permutex2var_epi32() {
53506 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53507 #[rustfmt::skip]
53508 let idx = _mm512_set_epi32(
53509 1, 1 << 4, 2, 1 << 4,
53510 3, 1 << 4, 4, 1 << 4,
53511 5, 1 << 4, 6, 1 << 4,
53512 7, 1 << 4, 8, 1 << 4,
53513 );
53514 let b = _mm512_set1_epi32(100);
53515 let r = _mm512_permutex2var_epi32(a, idx, b);
53516 let e = _mm512_set_epi32(
53517 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
53518 );
53519 assert_eq_m512i(r, e);
53520 }
53521
53522 #[simd_test(enable = "avx512f")]
53523 unsafe fn test_mm512_mask_permutex2var_epi32() {
53524 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53525 #[rustfmt::skip]
53526 let idx = _mm512_set_epi32(
53527 1, 1 << 4, 2, 1 << 4,
53528 3, 1 << 4, 4, 1 << 4,
53529 5, 1 << 4, 6, 1 << 4,
53530 7, 1 << 4, 8, 1 << 4,
53531 );
53532 let b = _mm512_set1_epi32(100);
53533 let r = _mm512_mask_permutex2var_epi32(a, 0, idx, b);
53534 assert_eq_m512i(r, a);
53535 let r = _mm512_mask_permutex2var_epi32(a, 0b11111111_11111111, idx, b);
53536 let e = _mm512_set_epi32(
53537 14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
53538 );
53539 assert_eq_m512i(r, e);
53540 }
53541
53542 #[simd_test(enable = "avx512f")]
53543 unsafe fn test_mm512_maskz_permutex2var_epi32() {
53544 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53545 #[rustfmt::skip]
53546 let idx = _mm512_set_epi32(
53547 1, 1 << 4, 2, 1 << 4,
53548 3, 1 << 4, 4, 1 << 4,
53549 5, 1 << 4, 6, 1 << 4,
53550 7, 1 << 4, 8, 1 << 4,
53551 );
53552 let b = _mm512_set1_epi32(100);
53553 let r = _mm512_maskz_permutex2var_epi32(0, a, idx, b);
53554 assert_eq_m512i(r, _mm512_setzero_si512());
53555 let r = _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a, idx, b);
53556 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100);
53557 assert_eq_m512i(r, e);
53558 }
53559
53560 #[simd_test(enable = "avx512f")]
53561 unsafe fn test_mm512_mask2_permutex2var_epi32() {
53562 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
53563 #[rustfmt::skip]
53564 let idx = _mm512_set_epi32(
53565 1000, 1 << 4, 2000, 1 << 4,
53566 3000, 1 << 4, 4000, 1 << 4,
53567 5, 1 << 4, 6, 1 << 4,
53568 7, 1 << 4, 8, 1 << 4,
53569 );
53570 let b = _mm512_set1_epi32(100);
53571 let r = _mm512_mask2_permutex2var_epi32(a, idx, 0, b);
53572 assert_eq_m512i(r, idx);
53573 let r = _mm512_mask2_permutex2var_epi32(a, idx, 0b00000000_11111111, b);
53574 #[rustfmt::skip]
53575 let e = _mm512_set_epi32(
53576 1000, 1 << 4, 2000, 1 << 4,
53577 3000, 1 << 4, 4000, 1 << 4,
53578 10, 100, 9, 100,
53579 8, 100, 7, 100,
53580 );
53581 assert_eq_m512i(r, e);
53582 }
53583
53584 #[simd_test(enable = "avx512f,avx512vl")]
53585 unsafe fn test_mm256_permutex2var_epi32() {
53586 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53587 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53588 let b = _mm256_set1_epi32(100);
53589 let r = _mm256_permutex2var_epi32(a, idx, b);
53590 let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53591 assert_eq_m256i(r, e);
53592 }
53593
53594 #[simd_test(enable = "avx512f,avx512vl")]
53595 unsafe fn test_mm256_mask_permutex2var_epi32() {
53596 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53597 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53598 let b = _mm256_set1_epi32(100);
53599 let r = _mm256_mask_permutex2var_epi32(a, 0, idx, b);
53600 assert_eq_m256i(r, a);
53601 let r = _mm256_mask_permutex2var_epi32(a, 0b11111111, idx, b);
53602 let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53603 assert_eq_m256i(r, e);
53604 }
53605
53606 #[simd_test(enable = "avx512f,avx512vl")]
53607 unsafe fn test_mm256_maskz_permutex2var_epi32() {
53608 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53609 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53610 let b = _mm256_set1_epi32(100);
53611 let r = _mm256_maskz_permutex2var_epi32(0, a, idx, b);
53612 assert_eq_m256i(r, _mm256_setzero_si256());
53613 let r = _mm256_maskz_permutex2var_epi32(0b11111111, a, idx, b);
53614 let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53615 assert_eq_m256i(r, e);
53616 }
53617
53618 #[simd_test(enable = "avx512f,avx512vl")]
53619 unsafe fn test_mm256_mask2_permutex2var_epi32() {
53620 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
53621 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53622 let b = _mm256_set1_epi32(100);
53623 let r = _mm256_mask2_permutex2var_epi32(a, idx, 0, b);
53624 assert_eq_m256i(r, idx);
53625 let r = _mm256_mask2_permutex2var_epi32(a, idx, 0b11111111, b);
53626 let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
53627 assert_eq_m256i(r, e);
53628 }
53629
53630 #[simd_test(enable = "avx512f,avx512vl")]
53631 unsafe fn test_mm_permutex2var_epi32() {
53632 let a = _mm_set_epi32(0, 1, 2, 3);
53633 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53634 let b = _mm_set1_epi32(100);
53635 let r = _mm_permutex2var_epi32(a, idx, b);
53636 let e = _mm_set_epi32(2, 100, 1, 100);
53637 assert_eq_m128i(r, e);
53638 }
53639
53640 #[simd_test(enable = "avx512f,avx512vl")]
53641 unsafe fn test_mm_mask_permutex2var_epi32() {
53642 let a = _mm_set_epi32(0, 1, 2, 3);
53643 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53644 let b = _mm_set1_epi32(100);
53645 let r = _mm_mask_permutex2var_epi32(a, 0, idx, b);
53646 assert_eq_m128i(r, a);
53647 let r = _mm_mask_permutex2var_epi32(a, 0b00001111, idx, b);
53648 let e = _mm_set_epi32(2, 100, 1, 100);
53649 assert_eq_m128i(r, e);
53650 }
53651
53652 #[simd_test(enable = "avx512f,avx512vl")]
53653 unsafe fn test_mm_maskz_permutex2var_epi32() {
53654 let a = _mm_set_epi32(0, 1, 2, 3);
53655 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53656 let b = _mm_set1_epi32(100);
53657 let r = _mm_maskz_permutex2var_epi32(0, a, idx, b);
53658 assert_eq_m128i(r, _mm_setzero_si128());
53659 let r = _mm_maskz_permutex2var_epi32(0b00001111, a, idx, b);
53660 let e = _mm_set_epi32(2, 100, 1, 100);
53661 assert_eq_m128i(r, e);
53662 }
53663
53664 #[simd_test(enable = "avx512f,avx512vl")]
53665 unsafe fn test_mm_mask2_permutex2var_epi32() {
53666 let a = _mm_set_epi32(0, 1, 2, 3);
53667 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53668 let b = _mm_set1_epi32(100);
53669 let r = _mm_mask2_permutex2var_epi32(a, idx, 0, b);
53670 assert_eq_m128i(r, idx);
53671 let r = _mm_mask2_permutex2var_epi32(a, idx, 0b00001111, b);
53672 let e = _mm_set_epi32(2, 100, 1, 100);
53673 assert_eq_m128i(r, e);
53674 }
53675
53676 #[simd_test(enable = "avx512f")]
53677 unsafe fn test_mm512_permutex2var_ps() {
53678 let a = _mm512_set_ps(
53679 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53680 );
53681 #[rustfmt::skip]
53682 let idx = _mm512_set_epi32(
53683 1, 1 << 4, 2, 1 << 4,
53684 3, 1 << 4, 4, 1 << 4,
53685 5, 1 << 4, 6, 1 << 4,
53686 7, 1 << 4, 8, 1 << 4,
53687 );
53688 let b = _mm512_set1_ps(100.);
53689 let r = _mm512_permutex2var_ps(a, idx, b);
53690 let e = _mm512_set_ps(
53691 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53692 );
53693 assert_eq_m512(r, e);
53694 }
53695
53696 #[simd_test(enable = "avx512f")]
53697 unsafe fn test_mm512_mask_permutex2var_ps() {
53698 let a = _mm512_set_ps(
53699 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53700 );
53701 #[rustfmt::skip]
53702 let idx = _mm512_set_epi32(
53703 1, 1 << 4, 2, 1 << 4,
53704 3, 1 << 4, 4, 1 << 4,
53705 5, 1 << 4, 6, 1 << 4,
53706 7, 1 << 4, 8, 1 << 4,
53707 );
53708 let b = _mm512_set1_ps(100.);
53709 let r = _mm512_mask_permutex2var_ps(a, 0, idx, b);
53710 assert_eq_m512(r, a);
53711 let r = _mm512_mask_permutex2var_ps(a, 0b11111111_11111111, idx, b);
53712 let e = _mm512_set_ps(
53713 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53714 );
53715 assert_eq_m512(r, e);
53716 }
53717
53718 #[simd_test(enable = "avx512f")]
53719 unsafe fn test_mm512_maskz_permutex2var_ps() {
53720 let a = _mm512_set_ps(
53721 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53722 );
53723 #[rustfmt::skip]
53724 let idx = _mm512_set_epi32(
53725 1, 1 << 4, 2, 1 << 4,
53726 3, 1 << 4, 4, 1 << 4,
53727 5, 1 << 4, 6, 1 << 4,
53728 7, 1 << 4, 8, 1 << 4,
53729 );
53730 let b = _mm512_set1_ps(100.);
53731 let r = _mm512_maskz_permutex2var_ps(0, a, idx, b);
53732 assert_eq_m512(r, _mm512_setzero_ps());
53733 let r = _mm512_maskz_permutex2var_ps(0b00000000_11111111, a, idx, b);
53734 let e = _mm512_set_ps(
53735 0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
53736 );
53737 assert_eq_m512(r, e);
53738 }
53739
53740 #[simd_test(enable = "avx512f")]
53741 unsafe fn test_mm512_mask2_permutex2var_ps() {
53742 let a = _mm512_set_ps(
53743 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
53744 );
53745 #[rustfmt::skip]
53746 let idx = _mm512_set_epi32(
53747 1, 1 << 4, 2, 1 << 4,
53748 3, 1 << 4, 4, 1 << 4,
53749 5, 1 << 4, 6, 1 << 4,
53750 7, 1 << 4, 8, 1 << 4,
53751 );
53752 let b = _mm512_set1_ps(100.);
53753 let r = _mm512_mask2_permutex2var_ps(a, idx, 0, b);
53754 assert_eq_m512(r, _mm512_castsi512_ps(idx));
53755 let r = _mm512_mask2_permutex2var_ps(a, idx, 0b11111111_11111111, b);
53756 let e = _mm512_set_ps(
53757 14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
53758 );
53759 assert_eq_m512(r, e);
53760 }
53761
53762 #[simd_test(enable = "avx512f,avx512vl")]
53763 unsafe fn test_mm256_permutex2var_ps() {
53764 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53765 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53766 let b = _mm256_set1_ps(100.);
53767 let r = _mm256_permutex2var_ps(a, idx, b);
53768 let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53769 assert_eq_m256(r, e);
53770 }
53771
53772 #[simd_test(enable = "avx512f,avx512vl")]
53773 unsafe fn test_mm256_mask_permutex2var_ps() {
53774 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53775 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53776 let b = _mm256_set1_ps(100.);
53777 let r = _mm256_mask_permutex2var_ps(a, 0, idx, b);
53778 assert_eq_m256(r, a);
53779 let r = _mm256_mask_permutex2var_ps(a, 0b11111111, idx, b);
53780 let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53781 assert_eq_m256(r, e);
53782 }
53783
53784 #[simd_test(enable = "avx512f,avx512vl")]
53785 unsafe fn test_mm256_maskz_permutex2var_ps() {
53786 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53787 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53788 let b = _mm256_set1_ps(100.);
53789 let r = _mm256_maskz_permutex2var_ps(0, a, idx, b);
53790 assert_eq_m256(r, _mm256_setzero_ps());
53791 let r = _mm256_maskz_permutex2var_ps(0b11111111, a, idx, b);
53792 let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53793 assert_eq_m256(r, e);
53794 }
53795
53796 #[simd_test(enable = "avx512f,avx512vl")]
53797 unsafe fn test_mm256_mask2_permutex2var_ps() {
53798 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
53799 let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
53800 let b = _mm256_set1_ps(100.);
53801 let r = _mm256_mask2_permutex2var_ps(a, idx, 0, b);
53802 assert_eq_m256(r, _mm256_castsi256_ps(idx));
53803 let r = _mm256_mask2_permutex2var_ps(a, idx, 0b11111111, b);
53804 let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
53805 assert_eq_m256(r, e);
53806 }
53807
53808 #[simd_test(enable = "avx512f,avx512vl")]
53809 unsafe fn test_mm_permutex2var_ps() {
53810 let a = _mm_set_ps(0., 1., 2., 3.);
53811 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53812 let b = _mm_set1_ps(100.);
53813 let r = _mm_permutex2var_ps(a, idx, b);
53814 let e = _mm_set_ps(2., 100., 1., 100.);
53815 assert_eq_m128(r, e);
53816 }
53817
53818 #[simd_test(enable = "avx512f,avx512vl")]
53819 unsafe fn test_mm_mask_permutex2var_ps() {
53820 let a = _mm_set_ps(0., 1., 2., 3.);
53821 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53822 let b = _mm_set1_ps(100.);
53823 let r = _mm_mask_permutex2var_ps(a, 0, idx, b);
53824 assert_eq_m128(r, a);
53825 let r = _mm_mask_permutex2var_ps(a, 0b00001111, idx, b);
53826 let e = _mm_set_ps(2., 100., 1., 100.);
53827 assert_eq_m128(r, e);
53828 }
53829
53830 #[simd_test(enable = "avx512f,avx512vl")]
53831 unsafe fn test_mm_maskz_permutex2var_ps() {
53832 let a = _mm_set_ps(0., 1., 2., 3.);
53833 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53834 let b = _mm_set1_ps(100.);
53835 let r = _mm_maskz_permutex2var_ps(0, a, idx, b);
53836 assert_eq_m128(r, _mm_setzero_ps());
53837 let r = _mm_maskz_permutex2var_ps(0b00001111, a, idx, b);
53838 let e = _mm_set_ps(2., 100., 1., 100.);
53839 assert_eq_m128(r, e);
53840 }
53841
53842 #[simd_test(enable = "avx512f,avx512vl")]
53843 unsafe fn test_mm_mask2_permutex2var_ps() {
53844 let a = _mm_set_ps(0., 1., 2., 3.);
53845 let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
53846 let b = _mm_set1_ps(100.);
53847 let r = _mm_mask2_permutex2var_ps(a, idx, 0, b);
53848 assert_eq_m128(r, _mm_castsi128_ps(idx));
53849 let r = _mm_mask2_permutex2var_ps(a, idx, 0b00001111, b);
53850 let e = _mm_set_ps(2., 100., 1., 100.);
53851 assert_eq_m128(r, e);
53852 }
53853
53854 #[simd_test(enable = "avx512f")]
53855 unsafe fn test_mm512_shuffle_epi32() {
53856 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53857 let r = _mm512_shuffle_epi32::<_MM_PERM_AADD>(a);
53858 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
53859 assert_eq_m512i(r, e);
53860 }
53861
53862 #[simd_test(enable = "avx512f")]
53863 unsafe fn test_mm512_mask_shuffle_epi32() {
53864 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53865 let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53866 assert_eq_m512i(r, a);
53867 let r = _mm512_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111_11111111, a);
53868 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
53869 assert_eq_m512i(r, e);
53870 }
53871
53872 #[simd_test(enable = "avx512f")]
53873 unsafe fn test_mm512_maskz_shuffle_epi32() {
53874 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
53875 let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53876 assert_eq_m512i(r, _mm512_setzero_si512());
53877 let r = _mm512_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00000000_11111111, a);
53878 let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0);
53879 assert_eq_m512i(r, e);
53880 }
53881
53882 #[simd_test(enable = "avx512f,avx512vl")]
53883 unsafe fn test_mm256_mask_shuffle_epi32() {
53884 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
53885 let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53886 assert_eq_m256i(r, a);
53887 let r = _mm256_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b11111111, a);
53888 let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
53889 assert_eq_m256i(r, e);
53890 }
53891
53892 #[simd_test(enable = "avx512f,avx512vl")]
53893 unsafe fn test_mm256_maskz_shuffle_epi32() {
53894 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
53895 let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53896 assert_eq_m256i(r, _mm256_setzero_si256());
53897 let r = _mm256_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b11111111, a);
53898 let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
53899 assert_eq_m256i(r, e);
53900 }
53901
53902 #[simd_test(enable = "avx512f,avx512vl")]
53903 unsafe fn test_mm_mask_shuffle_epi32() {
53904 let a = _mm_set_epi32(1, 4, 5, 8);
53905 let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0, a);
53906 assert_eq_m128i(r, a);
53907 let r = _mm_mask_shuffle_epi32::<_MM_PERM_AADD>(a, 0b00001111, a);
53908 let e = _mm_set_epi32(8, 8, 1, 1);
53909 assert_eq_m128i(r, e);
53910 }
53911
53912 #[simd_test(enable = "avx512f,avx512vl")]
53913 unsafe fn test_mm_maskz_shuffle_epi32() {
53914 let a = _mm_set_epi32(1, 4, 5, 8);
53915 let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0, a);
53916 assert_eq_m128i(r, _mm_setzero_si128());
53917 let r = _mm_maskz_shuffle_epi32::<_MM_PERM_AADD>(0b00001111, a);
53918 let e = _mm_set_epi32(8, 8, 1, 1);
53919 assert_eq_m128i(r, e);
53920 }
53921
53922 #[simd_test(enable = "avx512f")]
53923 unsafe fn test_mm512_shuffle_ps() {
53924 let a = _mm512_setr_ps(
53925 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
53926 );
53927 let b = _mm512_setr_ps(
53928 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
53929 );
53930 let r = _mm512_shuffle_ps::<0b00_00_11_11>(a, b);
53931 let e = _mm512_setr_ps(
53932 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
53933 );
53934 assert_eq_m512(r, e);
53935 }
53936
53937 #[simd_test(enable = "avx512f")]
53938 unsafe fn test_mm512_mask_shuffle_ps() {
53939 let a = _mm512_setr_ps(
53940 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
53941 );
53942 let b = _mm512_setr_ps(
53943 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
53944 );
53945 let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0, a, b);
53946 assert_eq_m512(r, a);
53947 let r = _mm512_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111_11111111, a, b);
53948 let e = _mm512_setr_ps(
53949 8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
53950 );
53951 assert_eq_m512(r, e);
53952 }
53953
53954 #[simd_test(enable = "avx512f")]
53955 unsafe fn test_mm512_maskz_shuffle_ps() {
53956 let a = _mm512_setr_ps(
53957 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
53958 );
53959 let b = _mm512_setr_ps(
53960 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
53961 );
53962 let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0, a, b);
53963 assert_eq_m512(r, _mm512_setzero_ps());
53964 let r = _mm512_maskz_shuffle_ps::<0b00_00_11_11>(0b00000000_11111111, a, b);
53965 let e = _mm512_setr_ps(
53966 8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0.,
53967 );
53968 assert_eq_m512(r, e);
53969 }
53970
53971 #[simd_test(enable = "avx512f,avx512vl")]
53972 unsafe fn test_mm256_mask_shuffle_ps() {
53973 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
53974 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
53975 let r = _mm256_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
53976 assert_eq_m256(r, a);
53977 let r = _mm256_mask_shuffle_ps::<0b00_00_11_11>(a, 0b11111111, a, b);
53978 let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
53979 assert_eq_m256(r, e);
53980 }
53981
53982 #[simd_test(enable = "avx512f,avx512vl")]
53983 unsafe fn test_mm256_maskz_shuffle_ps() {
53984 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
53985 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
53986 let r = _mm256_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
53987 assert_eq_m256(r, _mm256_setzero_ps());
53988 let r = _mm256_maskz_shuffle_ps::<0b00_00_11_11>(0b11111111, a, b);
53989 let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
53990 assert_eq_m256(r, e);
53991 }
53992
53993 #[simd_test(enable = "avx512f,avx512vl")]
53994 unsafe fn test_mm_mask_shuffle_ps() {
53995 let a = _mm_set_ps(1., 4., 5., 8.);
53996 let b = _mm_set_ps(2., 3., 6., 7.);
53997 let r = _mm_mask_shuffle_ps::<0b11_11_11_11>(a, 0, a, b);
53998 assert_eq_m128(r, a);
53999 let r = _mm_mask_shuffle_ps::<0b00_00_11_11>(a, 0b00001111, a, b);
54000 let e = _mm_set_ps(7., 7., 1., 1.);
54001 assert_eq_m128(r, e);
54002 }
54003
54004 #[simd_test(enable = "avx512f,avx512vl")]
54005 unsafe fn test_mm_maskz_shuffle_ps() {
54006 let a = _mm_set_ps(1., 4., 5., 8.);
54007 let b = _mm_set_ps(2., 3., 6., 7.);
54008 let r = _mm_maskz_shuffle_ps::<0b11_11_11_11>(0, a, b);
54009 assert_eq_m128(r, _mm_setzero_ps());
54010 let r = _mm_maskz_shuffle_ps::<0b00_00_11_11>(0b00001111, a, b);
54011 let e = _mm_set_ps(7., 7., 1., 1.);
54012 assert_eq_m128(r, e);
54013 }
54014
54015 #[simd_test(enable = "avx512f")]
54016 unsafe fn test_mm512_shuffle_i32x4() {
54017 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54018 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54019 let r = _mm512_shuffle_i32x4::<0b00_00_00_00>(a, b);
54020 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
54021 assert_eq_m512i(r, e);
54022 }
54023
54024 #[simd_test(enable = "avx512f")]
54025 unsafe fn test_mm512_mask_shuffle_i32x4() {
54026 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54027 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54028 let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0, a, b);
54029 assert_eq_m512i(r, a);
54030 let r = _mm512_mask_shuffle_i32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
54031 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
54032 assert_eq_m512i(r, e);
54033 }
54034
54035 #[simd_test(enable = "avx512f")]
54036 unsafe fn test_mm512_maskz_shuffle_i32x4() {
54037 let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
54038 let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
54039 let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0, a, b);
54040 assert_eq_m512i(r, _mm512_setzero_si512());
54041 let r = _mm512_maskz_shuffle_i32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
54042 let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0);
54043 assert_eq_m512i(r, e);
54044 }
54045
54046 #[simd_test(enable = "avx512f,avx512vl")]
54047 unsafe fn test_mm256_shuffle_i32x4() {
54048 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54049 let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54050 let r = _mm256_shuffle_i32x4::<0b00>(a, b);
54051 let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54052 assert_eq_m256i(r, e);
54053 }
54054
54055 #[simd_test(enable = "avx512f,avx512vl")]
54056 unsafe fn test_mm256_mask_shuffle_i32x4() {
54057 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54058 let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54059 let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0, a, b);
54060 assert_eq_m256i(r, a);
54061 let r = _mm256_mask_shuffle_i32x4::<0b00>(a, 0b11111111, a, b);
54062 let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54063 assert_eq_m256i(r, e);
54064 }
54065
54066 #[simd_test(enable = "avx512f,avx512vl")]
54067 unsafe fn test_mm256_maskz_shuffle_i32x4() {
54068 let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
54069 let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
54070 let r = _mm256_maskz_shuffle_i32x4::<0b00>(0, a, b);
54071 assert_eq_m256i(r, _mm256_setzero_si256());
54072 let r = _mm256_maskz_shuffle_i32x4::<0b00>(0b11111111, a, b);
54073 let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
54074 assert_eq_m256i(r, e);
54075 }
54076
54077 #[simd_test(enable = "avx512f")]
54078 unsafe fn test_mm512_shuffle_f32x4() {
54079 let a = _mm512_setr_ps(
54080 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54081 );
54082 let b = _mm512_setr_ps(
54083 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54084 );
54085 let r = _mm512_shuffle_f32x4::<0b00_00_00_00>(a, b);
54086 let e = _mm512_setr_ps(
54087 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
54088 );
54089 assert_eq_m512(r, e);
54090 }
54091
54092 #[simd_test(enable = "avx512f")]
54093 unsafe fn test_mm512_mask_shuffle_f32x4() {
54094 let a = _mm512_setr_ps(
54095 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54096 );
54097 let b = _mm512_setr_ps(
54098 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54099 );
54100 let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0, a, b);
54101 assert_eq_m512(r, a);
54102 let r = _mm512_mask_shuffle_f32x4::<0b00_00_00_00>(a, 0b11111111_11111111, a, b);
54103 let e = _mm512_setr_ps(
54104 1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
54105 );
54106 assert_eq_m512(r, e);
54107 }
54108
54109 #[simd_test(enable = "avx512f")]
54110 unsafe fn test_mm512_maskz_shuffle_f32x4() {
54111 let a = _mm512_setr_ps(
54112 1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
54113 );
54114 let b = _mm512_setr_ps(
54115 2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
54116 );
54117 let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0, a, b);
54118 assert_eq_m512(r, _mm512_setzero_ps());
54119 let r = _mm512_maskz_shuffle_f32x4::<0b00_00_00_00>(0b00000000_11111111, a, b);
54120 let e = _mm512_setr_ps(
54121 1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54122 );
54123 assert_eq_m512(r, e);
54124 }
54125
54126 #[simd_test(enable = "avx512f,avx512vl")]
54127 unsafe fn test_mm256_shuffle_f32x4() {
54128 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54129 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54130 let r = _mm256_shuffle_f32x4::<0b00>(a, b);
54131 let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54132 assert_eq_m256(r, e);
54133 }
54134
54135 #[simd_test(enable = "avx512f,avx512vl")]
54136 unsafe fn test_mm256_mask_shuffle_f32x4() {
54137 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54138 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54139 let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0, a, b);
54140 assert_eq_m256(r, a);
54141 let r = _mm256_mask_shuffle_f32x4::<0b00>(a, 0b11111111, a, b);
54142 let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54143 assert_eq_m256(r, e);
54144 }
54145
54146 #[simd_test(enable = "avx512f,avx512vl")]
54147 unsafe fn test_mm256_maskz_shuffle_f32x4() {
54148 let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
54149 let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
54150 let r = _mm256_maskz_shuffle_f32x4::<0b00>(0, a, b);
54151 assert_eq_m256(r, _mm256_setzero_ps());
54152 let r = _mm256_maskz_shuffle_f32x4::<0b00>(0b11111111, a, b);
54153 let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
54154 assert_eq_m256(r, e);
54155 }
54156
54157 #[simd_test(enable = "avx512f")]
54158 unsafe fn test_mm512_extractf32x4_ps() {
54159 let a = _mm512_setr_ps(
54160 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54161 );
54162 let r = _mm512_extractf32x4_ps::<1>(a);
54163 let e = _mm_setr_ps(5., 6., 7., 8.);
54164 assert_eq_m128(r, e);
54165 }
54166
54167 #[simd_test(enable = "avx512f")]
54168 unsafe fn test_mm512_mask_extractf32x4_ps() {
54169 let a = _mm512_setr_ps(
54170 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54171 );
54172 let src = _mm_set1_ps(100.);
54173 let r = _mm512_mask_extractf32x4_ps::<1>(src, 0, a);
54174 assert_eq_m128(r, src);
54175 let r = _mm512_mask_extractf32x4_ps::<1>(src, 0b11111111, a);
54176 let e = _mm_setr_ps(5., 6., 7., 8.);
54177 assert_eq_m128(r, e);
54178 }
54179
54180 #[simd_test(enable = "avx512f")]
54181 unsafe fn test_mm512_maskz_extractf32x4_ps() {
54182 let a = _mm512_setr_ps(
54183 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54184 );
54185 let r = _mm512_maskz_extractf32x4_ps::<1>(0, a);
54186 assert_eq_m128(r, _mm_setzero_ps());
54187 let r = _mm512_maskz_extractf32x4_ps::<1>(0b00000001, a);
54188 let e = _mm_setr_ps(5., 0., 0., 0.);
54189 assert_eq_m128(r, e);
54190 }
54191
54192 #[simd_test(enable = "avx512f,avx512vl")]
54193 unsafe fn test_mm256_extractf32x4_ps() {
54194 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54195 let r = _mm256_extractf32x4_ps::<1>(a);
54196 let e = _mm_set_ps(1., 2., 3., 4.);
54197 assert_eq_m128(r, e);
54198 }
54199
54200 #[simd_test(enable = "avx512f,avx512vl")]
54201 unsafe fn test_mm256_mask_extractf32x4_ps() {
54202 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54203 let src = _mm_set1_ps(100.);
54204 let r = _mm256_mask_extractf32x4_ps::<1>(src, 0, a);
54205 assert_eq_m128(r, src);
54206 let r = _mm256_mask_extractf32x4_ps::<1>(src, 0b00001111, a);
54207 let e = _mm_set_ps(1., 2., 3., 4.);
54208 assert_eq_m128(r, e);
54209 }
54210
54211 #[simd_test(enable = "avx512f,avx512vl")]
54212 unsafe fn test_mm256_maskz_extractf32x4_ps() {
54213 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54214 let r = _mm256_maskz_extractf32x4_ps::<1>(0, a);
54215 assert_eq_m128(r, _mm_setzero_ps());
54216 let r = _mm256_maskz_extractf32x4_ps::<1>(0b00001111, a);
54217 let e = _mm_set_ps(1., 2., 3., 4.);
54218 assert_eq_m128(r, e);
54219 }
54220
54221 #[simd_test(enable = "avx512f")]
54222 unsafe fn test_mm512_extracti32x4_epi32() {
54223 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54224 let r = _mm512_extracti32x4_epi32::<1>(a);
54225 let e = _mm_setr_epi32(5, 6, 7, 8);
54226 assert_eq_m128i(r, e);
54227 }
54228
54229 #[simd_test(enable = "avx512f")]
54230 unsafe fn test_mm512_mask_extracti32x4_epi32() {
54231 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54232 let src = _mm_set1_epi32(100);
54233 let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0, a);
54234 assert_eq_m128i(r, src);
54235 let r = _mm512_mask_extracti32x4_epi32::<1>(src, 0b11111111, a);
54236 let e = _mm_setr_epi32(5, 6, 7, 8);
54237 assert_eq_m128i(r, e);
54238 }
54239
54240 #[simd_test(enable = "avx512f,avx512vl")]
54241 unsafe fn test_mm512_maskz_extracti32x4_epi32() {
54242 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54243 let r = _mm512_maskz_extracti32x4_epi32::<1>(0, a);
54244 assert_eq_m128i(r, _mm_setzero_si128());
54245 let r = _mm512_maskz_extracti32x4_epi32::<1>(0b00000001, a);
54246 let e = _mm_setr_epi32(5, 0, 0, 0);
54247 assert_eq_m128i(r, e);
54248 }
54249
54250 #[simd_test(enable = "avx512f,avx512vl")]
54251 unsafe fn test_mm256_extracti32x4_epi32() {
54252 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54253 let r = _mm256_extracti32x4_epi32::<1>(a);
54254 let e = _mm_set_epi32(1, 2, 3, 4);
54255 assert_eq_m128i(r, e);
54256 }
54257
54258 #[simd_test(enable = "avx512f,avx512vl")]
54259 unsafe fn test_mm256_mask_extracti32x4_epi32() {
54260 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54261 let src = _mm_set1_epi32(100);
54262 let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0, a);
54263 assert_eq_m128i(r, src);
54264 let r = _mm256_mask_extracti32x4_epi32::<1>(src, 0b00001111, a);
54265 let e = _mm_set_epi32(1, 2, 3, 4);
54266 assert_eq_m128i(r, e);
54267 }
54268
54269 #[simd_test(enable = "avx512f,avx512vl")]
54270 unsafe fn test_mm256_maskz_extracti32x4_epi32() {
54271 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54272 let r = _mm256_maskz_extracti32x4_epi32::<1>(0, a);
54273 assert_eq_m128i(r, _mm_setzero_si128());
54274 let r = _mm256_maskz_extracti32x4_epi32::<1>(0b00001111, a);
54275 let e = _mm_set_epi32(1, 2, 3, 4);
54276 assert_eq_m128i(r, e);
54277 }
54278
54279 #[simd_test(enable = "avx512f")]
54280 unsafe fn test_mm512_moveldup_ps() {
54281 let a = _mm512_setr_ps(
54282 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54283 );
54284 let r = _mm512_moveldup_ps(a);
54285 let e = _mm512_setr_ps(
54286 1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
54287 );
54288 assert_eq_m512(r, e);
54289 }
54290
54291 #[simd_test(enable = "avx512f")]
54292 unsafe fn test_mm512_mask_moveldup_ps() {
54293 let a = _mm512_setr_ps(
54294 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54295 );
54296 let r = _mm512_mask_moveldup_ps(a, 0, a);
54297 assert_eq_m512(r, a);
54298 let r = _mm512_mask_moveldup_ps(a, 0b11111111_11111111, a);
54299 let e = _mm512_setr_ps(
54300 1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
54301 );
54302 assert_eq_m512(r, e);
54303 }
54304
54305 #[simd_test(enable = "avx512f")]
54306 unsafe fn test_mm512_maskz_moveldup_ps() {
54307 let a = _mm512_setr_ps(
54308 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54309 );
54310 let r = _mm512_maskz_moveldup_ps(0, a);
54311 assert_eq_m512(r, _mm512_setzero_ps());
54312 let r = _mm512_maskz_moveldup_ps(0b00000000_11111111, a);
54313 let e = _mm512_setr_ps(
54314 1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
54315 );
54316 assert_eq_m512(r, e);
54317 }
54318
54319 #[simd_test(enable = "avx512f,avx512vl")]
54320 unsafe fn test_mm256_mask_moveldup_ps() {
54321 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54322 let r = _mm256_mask_moveldup_ps(a, 0, a);
54323 assert_eq_m256(r, a);
54324 let r = _mm256_mask_moveldup_ps(a, 0b11111111, a);
54325 let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
54326 assert_eq_m256(r, e);
54327 }
54328
54329 #[simd_test(enable = "avx512f,avx512vl")]
54330 unsafe fn test_mm256_maskz_moveldup_ps() {
54331 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54332 let r = _mm256_maskz_moveldup_ps(0, a);
54333 assert_eq_m256(r, _mm256_setzero_ps());
54334 let r = _mm256_maskz_moveldup_ps(0b11111111, a);
54335 let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
54336 assert_eq_m256(r, e);
54337 }
54338
54339 #[simd_test(enable = "avx512f,avx512vl")]
54340 unsafe fn test_mm_mask_moveldup_ps() {
54341 let a = _mm_set_ps(1., 2., 3., 4.);
54342 let r = _mm_mask_moveldup_ps(a, 0, a);
54343 assert_eq_m128(r, a);
54344 let r = _mm_mask_moveldup_ps(a, 0b00001111, a);
54345 let e = _mm_set_ps(2., 2., 4., 4.);
54346 assert_eq_m128(r, e);
54347 }
54348
54349 #[simd_test(enable = "avx512f,avx512vl")]
54350 unsafe fn test_mm_maskz_moveldup_ps() {
54351 let a = _mm_set_ps(1., 2., 3., 4.);
54352 let r = _mm_maskz_moveldup_ps(0, a);
54353 assert_eq_m128(r, _mm_setzero_ps());
54354 let r = _mm_maskz_moveldup_ps(0b00001111, a);
54355 let e = _mm_set_ps(2., 2., 4., 4.);
54356 assert_eq_m128(r, e);
54357 }
54358
54359 #[simd_test(enable = "avx512f")]
54360 unsafe fn test_mm512_movehdup_ps() {
54361 let a = _mm512_setr_ps(
54362 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54363 );
54364 let r = _mm512_movehdup_ps(a);
54365 let e = _mm512_setr_ps(
54366 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
54367 );
54368 assert_eq_m512(r, e);
54369 }
54370
54371 #[simd_test(enable = "avx512f")]
54372 unsafe fn test_mm512_mask_movehdup_ps() {
54373 let a = _mm512_setr_ps(
54374 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54375 );
54376 let r = _mm512_mask_movehdup_ps(a, 0, a);
54377 assert_eq_m512(r, a);
54378 let r = _mm512_mask_movehdup_ps(a, 0b11111111_11111111, a);
54379 let e = _mm512_setr_ps(
54380 2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
54381 );
54382 assert_eq_m512(r, e);
54383 }
54384
54385 #[simd_test(enable = "avx512f")]
54386 unsafe fn test_mm512_maskz_movehdup_ps() {
54387 let a = _mm512_setr_ps(
54388 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54389 );
54390 let r = _mm512_maskz_movehdup_ps(0, a);
54391 assert_eq_m512(r, _mm512_setzero_ps());
54392 let r = _mm512_maskz_movehdup_ps(0b00000000_11111111, a);
54393 let e = _mm512_setr_ps(
54394 2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54395 );
54396 assert_eq_m512(r, e);
54397 }
54398
54399 #[simd_test(enable = "avx512f,avx512vl")]
54400 unsafe fn test_mm256_mask_movehdup_ps() {
54401 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54402 let r = _mm256_mask_movehdup_ps(a, 0, a);
54403 assert_eq_m256(r, a);
54404 let r = _mm256_mask_movehdup_ps(a, 0b11111111, a);
54405 let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
54406 assert_eq_m256(r, e);
54407 }
54408
54409 #[simd_test(enable = "avx512f,avx512vl")]
54410 unsafe fn test_mm256_maskz_movehdup_ps() {
54411 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54412 let r = _mm256_maskz_movehdup_ps(0, a);
54413 assert_eq_m256(r, _mm256_setzero_ps());
54414 let r = _mm256_maskz_movehdup_ps(0b11111111, a);
54415 let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
54416 assert_eq_m256(r, e);
54417 }
54418
54419 #[simd_test(enable = "avx512f,avx512vl")]
54420 unsafe fn test_mm_mask_movehdup_ps() {
54421 let a = _mm_set_ps(1., 2., 3., 4.);
54422 let r = _mm_mask_movehdup_ps(a, 0, a);
54423 assert_eq_m128(r, a);
54424 let r = _mm_mask_movehdup_ps(a, 0b00001111, a);
54425 let e = _mm_set_ps(1., 1., 3., 3.);
54426 assert_eq_m128(r, e);
54427 }
54428
54429 #[simd_test(enable = "avx512f,avx512vl")]
54430 unsafe fn test_mm_maskz_movehdup_ps() {
54431 let a = _mm_set_ps(1., 2., 3., 4.);
54432 let r = _mm_maskz_movehdup_ps(0, a);
54433 assert_eq_m128(r, _mm_setzero_ps());
54434 let r = _mm_maskz_movehdup_ps(0b00001111, a);
54435 let e = _mm_set_ps(1., 1., 3., 3.);
54436 assert_eq_m128(r, e);
54437 }
54438
54439 #[simd_test(enable = "avx512f")]
54440 unsafe fn test_mm512_inserti32x4() {
54441 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54442 let b = _mm_setr_epi32(17, 18, 19, 20);
54443 let r = _mm512_inserti32x4::<0>(a, b);
54444 let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54445 assert_eq_m512i(r, e);
54446 }
54447
54448 #[simd_test(enable = "avx512f")]
54449 unsafe fn test_mm512_mask_inserti32x4() {
54450 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54451 let b = _mm_setr_epi32(17, 18, 19, 20);
54452 let r = _mm512_mask_inserti32x4::<0>(a, 0, a, b);
54453 assert_eq_m512i(r, a);
54454 let r = _mm512_mask_inserti32x4::<0>(a, 0b11111111_11111111, a, b);
54455 let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54456 assert_eq_m512i(r, e);
54457 }
54458
54459 #[simd_test(enable = "avx512f")]
54460 unsafe fn test_mm512_maskz_inserti32x4() {
54461 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54462 let b = _mm_setr_epi32(17, 18, 19, 20);
54463 let r = _mm512_maskz_inserti32x4::<0>(0, a, b);
54464 assert_eq_m512i(r, _mm512_setzero_si512());
54465 let r = _mm512_maskz_inserti32x4::<0>(0b00000000_11111111, a, b);
54466 let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0);
54467 assert_eq_m512i(r, e);
54468 }
54469
54470 #[simd_test(enable = "avx512f,avx512vl")]
54471 unsafe fn test_mm256_inserti32x4() {
54472 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54473 let b = _mm_set_epi32(17, 18, 19, 20);
54474 let r = _mm256_inserti32x4::<1>(a, b);
54475 let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54476 assert_eq_m256i(r, e);
54477 }
54478
54479 #[simd_test(enable = "avx512f,avx512vl")]
54480 unsafe fn test_mm256_mask_inserti32x4() {
54481 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54482 let b = _mm_set_epi32(17, 18, 19, 20);
54483 let r = _mm256_mask_inserti32x4::<0>(a, 0, a, b);
54484 assert_eq_m256i(r, a);
54485 let r = _mm256_mask_inserti32x4::<1>(a, 0b11111111, a, b);
54486 let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54487 assert_eq_m256i(r, e);
54488 }
54489
54490 #[simd_test(enable = "avx512f,avx512vl")]
54491 unsafe fn test_mm256_maskz_inserti32x4() {
54492 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
54493 let b = _mm_set_epi32(17, 18, 19, 20);
54494 let r = _mm256_maskz_inserti32x4::<0>(0, a, b);
54495 assert_eq_m256i(r, _mm256_setzero_si256());
54496 let r = _mm256_maskz_inserti32x4::<1>(0b11111111, a, b);
54497 let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
54498 assert_eq_m256i(r, e);
54499 }
54500
54501 #[simd_test(enable = "avx512f")]
54502 unsafe fn test_mm512_insertf32x4() {
54503 let a = _mm512_setr_ps(
54504 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54505 );
54506 let b = _mm_setr_ps(17., 18., 19., 20.);
54507 let r = _mm512_insertf32x4::<0>(a, b);
54508 let e = _mm512_setr_ps(
54509 17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54510 );
54511 assert_eq_m512(r, e);
54512 }
54513
54514 #[simd_test(enable = "avx512f")]
54515 unsafe fn test_mm512_mask_insertf32x4() {
54516 let a = _mm512_setr_ps(
54517 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54518 );
54519 let b = _mm_setr_ps(17., 18., 19., 20.);
54520 let r = _mm512_mask_insertf32x4::<0>(a, 0, a, b);
54521 assert_eq_m512(r, a);
54522 let r = _mm512_mask_insertf32x4::<0>(a, 0b11111111_11111111, a, b);
54523 let e = _mm512_setr_ps(
54524 17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54525 );
54526 assert_eq_m512(r, e);
54527 }
54528
54529 #[simd_test(enable = "avx512f")]
54530 unsafe fn test_mm512_maskz_insertf32x4() {
54531 let a = _mm512_setr_ps(
54532 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
54533 );
54534 let b = _mm_setr_ps(17., 18., 19., 20.);
54535 let r = _mm512_maskz_insertf32x4::<0>(0, a, b);
54536 assert_eq_m512(r, _mm512_setzero_ps());
54537 let r = _mm512_maskz_insertf32x4::<0>(0b00000000_11111111, a, b);
54538 let e = _mm512_setr_ps(
54539 17., 18., 19., 20., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
54540 );
54541 assert_eq_m512(r, e);
54542 }
54543
54544 #[simd_test(enable = "avx512f,avx512vl")]
54545 unsafe fn test_mm256_insertf32x4() {
54546 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54547 let b = _mm_set_ps(17., 18., 19., 20.);
54548 let r = _mm256_insertf32x4::<1>(a, b);
54549 let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54550 assert_eq_m256(r, e);
54551 }
54552
54553 #[simd_test(enable = "avx512f,avx512vl")]
54554 unsafe fn test_mm256_mask_insertf32x4() {
54555 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54556 let b = _mm_set_ps(17., 18., 19., 20.);
54557 let r = _mm256_mask_insertf32x4::<0>(a, 0, a, b);
54558 assert_eq_m256(r, a);
54559 let r = _mm256_mask_insertf32x4::<1>(a, 0b11111111, a, b);
54560 let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54561 assert_eq_m256(r, e);
54562 }
54563
54564 #[simd_test(enable = "avx512f,avx512vl")]
54565 unsafe fn test_mm256_maskz_insertf32x4() {
54566 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
54567 let b = _mm_set_ps(17., 18., 19., 20.);
54568 let r = _mm256_maskz_insertf32x4::<0>(0, a, b);
54569 assert_eq_m256(r, _mm256_setzero_ps());
54570 let r = _mm256_maskz_insertf32x4::<1>(0b11111111, a, b);
54571 let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
54572 assert_eq_m256(r, e);
54573 }
54574
54575 #[simd_test(enable = "avx512f")]
54576 unsafe fn test_mm512_castps128_ps512() {
54577 let a = _mm_setr_ps(17., 18., 19., 20.);
54578 let r = _mm512_castps128_ps512(a);
54579 assert_eq_m128(_mm512_castps512_ps128(r), a);
54580 }
54581
54582 #[simd_test(enable = "avx512f")]
54583 unsafe fn test_mm512_castps256_ps512() {
54584 let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54585 let r = _mm512_castps256_ps512(a);
54586 assert_eq_m256(_mm512_castps512_ps256(r), a);
54587 }
54588
54589 #[simd_test(enable = "avx512f")]
54590 unsafe fn test_mm512_zextps128_ps512() {
54591 let a = _mm_setr_ps(17., 18., 19., 20.);
54592 let r = _mm512_zextps128_ps512(a);
54593 let e = _mm512_setr_ps(
54594 17., 18., 19., 20., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
54595 );
54596 assert_eq_m512(r, e);
54597 }
54598
54599 #[simd_test(enable = "avx512f")]
54600 unsafe fn test_mm512_zextps256_ps512() {
54601 let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54602 let r = _mm512_zextps256_ps512(a);
54603 let e = _mm512_setr_ps(
54604 17., 18., 19., 20., 21., 22., 23., 24., 0., 0., 0., 0., 0., 0., 0., 0.,
54605 );
54606 assert_eq_m512(r, e);
54607 }
54608
54609 #[simd_test(enable = "avx512f")]
54610 unsafe fn test_mm512_castps512_ps128() {
54611 let a = _mm512_setr_ps(
54612 17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
54613 );
54614 let r = _mm512_castps512_ps128(a);
54615 let e = _mm_setr_ps(17., 18., 19., 20.);
54616 assert_eq_m128(r, e);
54617 }
54618
54619 #[simd_test(enable = "avx512f")]
54620 unsafe fn test_mm512_castps512_ps256() {
54621 let a = _mm512_setr_ps(
54622 17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
54623 );
54624 let r = _mm512_castps512_ps256(a);
54625 let e = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
54626 assert_eq_m256(r, e);
54627 }
54628
54629 #[simd_test(enable = "avx512f")]
54630 unsafe fn test_mm512_castps_pd() {
54631 let a = _mm512_set1_ps(1.);
54632 let r = _mm512_castps_pd(a);
54633 let e = _mm512_set1_pd(0.007812501848093234);
54634 assert_eq_m512d(r, e);
54635 }
54636
54637 #[simd_test(enable = "avx512f")]
54638 unsafe fn test_mm512_castps_si512() {
54639 let a = _mm512_set1_ps(1.);
54640 let r = _mm512_castps_si512(a);
54641 let e = _mm512_set1_epi32(1065353216);
54642 assert_eq_m512i(r, e);
54643 }
54644
54645 #[simd_test(enable = "avx512f")]
54646 unsafe fn test_mm512_broadcastd_epi32() {
54647 let a = _mm_set_epi32(17, 18, 19, 20);
54648 let r = _mm512_broadcastd_epi32(a);
54649 let e = _mm512_set1_epi32(20);
54650 assert_eq_m512i(r, e);
54651 }
54652
54653 #[simd_test(enable = "avx512f")]
54654 unsafe fn test_mm512_mask_broadcastd_epi32() {
54655 let src = _mm512_set1_epi32(20);
54656 let a = _mm_set_epi32(17, 18, 19, 20);
54657 let r = _mm512_mask_broadcastd_epi32(src, 0, a);
54658 assert_eq_m512i(r, src);
54659 let r = _mm512_mask_broadcastd_epi32(src, 0b11111111_11111111, a);
54660 let e = _mm512_set1_epi32(20);
54661 assert_eq_m512i(r, e);
54662 }
54663
54664 #[simd_test(enable = "avx512f")]
54665 unsafe fn test_mm512_maskz_broadcastd_epi32() {
54666 let a = _mm_set_epi32(17, 18, 19, 20);
54667 let r = _mm512_maskz_broadcastd_epi32(0, a);
54668 assert_eq_m512i(r, _mm512_setzero_si512());
54669 let r = _mm512_maskz_broadcastd_epi32(0b00000000_11111111, a);
54670 let e = _mm512_setr_epi32(20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0);
54671 assert_eq_m512i(r, e);
54672 }
54673
54674 #[simd_test(enable = "avx512f,avx512vl")]
54675 unsafe fn test_mm256_mask_broadcastd_epi32() {
54676 let src = _mm256_set1_epi32(20);
54677 let a = _mm_set_epi32(17, 18, 19, 20);
54678 let r = _mm256_mask_broadcastd_epi32(src, 0, a);
54679 assert_eq_m256i(r, src);
54680 let r = _mm256_mask_broadcastd_epi32(src, 0b11111111, a);
54681 let e = _mm256_set1_epi32(20);
54682 assert_eq_m256i(r, e);
54683 }
54684
54685 #[simd_test(enable = "avx512f,avx512vl")]
54686 unsafe fn test_mm256_maskz_broadcastd_epi32() {
54687 let a = _mm_set_epi32(17, 18, 19, 20);
54688 let r = _mm256_maskz_broadcastd_epi32(0, a);
54689 assert_eq_m256i(r, _mm256_setzero_si256());
54690 let r = _mm256_maskz_broadcastd_epi32(0b11111111, a);
54691 let e = _mm256_set1_epi32(20);
54692 assert_eq_m256i(r, e);
54693 }
54694
54695 #[simd_test(enable = "avx512f,avx512vl")]
54696 unsafe fn test_mm_mask_broadcastd_epi32() {
54697 let src = _mm_set1_epi32(20);
54698 let a = _mm_set_epi32(17, 18, 19, 20);
54699 let r = _mm_mask_broadcastd_epi32(src, 0, a);
54700 assert_eq_m128i(r, src);
54701 let r = _mm_mask_broadcastd_epi32(src, 0b00001111, a);
54702 let e = _mm_set1_epi32(20);
54703 assert_eq_m128i(r, e);
54704 }
54705
54706 #[simd_test(enable = "avx512f,avx512vl")]
54707 unsafe fn test_mm_maskz_broadcastd_epi32() {
54708 let a = _mm_set_epi32(17, 18, 19, 20);
54709 let r = _mm_maskz_broadcastd_epi32(0, a);
54710 assert_eq_m128i(r, _mm_setzero_si128());
54711 let r = _mm_maskz_broadcastd_epi32(0b00001111, a);
54712 let e = _mm_set1_epi32(20);
54713 assert_eq_m128i(r, e);
54714 }
54715
54716 #[simd_test(enable = "avx512f")]
54717 unsafe fn test_mm512_broadcastss_ps() {
54718 let a = _mm_set_ps(17., 18., 19., 20.);
54719 let r = _mm512_broadcastss_ps(a);
54720 let e = _mm512_set1_ps(20.);
54721 assert_eq_m512(r, e);
54722 }
54723
54724 #[simd_test(enable = "avx512f")]
54725 unsafe fn test_mm512_mask_broadcastss_ps() {
54726 let src = _mm512_set1_ps(20.);
54727 let a = _mm_set_ps(17., 18., 19., 20.);
54728 let r = _mm512_mask_broadcastss_ps(src, 0, a);
54729 assert_eq_m512(r, src);
54730 let r = _mm512_mask_broadcastss_ps(src, 0b11111111_11111111, a);
54731 let e = _mm512_set1_ps(20.);
54732 assert_eq_m512(r, e);
54733 }
54734
54735 #[simd_test(enable = "avx512f")]
54736 unsafe fn test_mm512_maskz_broadcastss_ps() {
54737 let a = _mm_set_ps(17., 18., 19., 20.);
54738 let r = _mm512_maskz_broadcastss_ps(0, a);
54739 assert_eq_m512(r, _mm512_setzero_ps());
54740 let r = _mm512_maskz_broadcastss_ps(0b00000000_11111111, a);
54741 let e = _mm512_setr_ps(
54742 20., 20., 20., 20., 20., 20., 20., 20., 0., 0., 0., 0., 0., 0., 0., 0.,
54743 );
54744 assert_eq_m512(r, e);
54745 }
54746
54747 #[simd_test(enable = "avx512f,avx512vl")]
54748 unsafe fn test_mm256_mask_broadcastss_ps() {
54749 let src = _mm256_set1_ps(20.);
54750 let a = _mm_set_ps(17., 18., 19., 20.);
54751 let r = _mm256_mask_broadcastss_ps(src, 0, a);
54752 assert_eq_m256(r, src);
54753 let r = _mm256_mask_broadcastss_ps(src, 0b11111111, a);
54754 let e = _mm256_set1_ps(20.);
54755 assert_eq_m256(r, e);
54756 }
54757
54758 #[simd_test(enable = "avx512f,avx512vl")]
54759 unsafe fn test_mm256_maskz_broadcastss_ps() {
54760 let a = _mm_set_ps(17., 18., 19., 20.);
54761 let r = _mm256_maskz_broadcastss_ps(0, a);
54762 assert_eq_m256(r, _mm256_setzero_ps());
54763 let r = _mm256_maskz_broadcastss_ps(0b11111111, a);
54764 let e = _mm256_set1_ps(20.);
54765 assert_eq_m256(r, e);
54766 }
54767
54768 #[simd_test(enable = "avx512f,avx512vl")]
54769 unsafe fn test_mm_mask_broadcastss_ps() {
54770 let src = _mm_set1_ps(20.);
54771 let a = _mm_set_ps(17., 18., 19., 20.);
54772 let r = _mm_mask_broadcastss_ps(src, 0, a);
54773 assert_eq_m128(r, src);
54774 let r = _mm_mask_broadcastss_ps(src, 0b00001111, a);
54775 let e = _mm_set1_ps(20.);
54776 assert_eq_m128(r, e);
54777 }
54778
54779 #[simd_test(enable = "avx512f,avx512vl")]
54780 unsafe fn test_mm_maskz_broadcastss_ps() {
54781 let a = _mm_set_ps(17., 18., 19., 20.);
54782 let r = _mm_maskz_broadcastss_ps(0, a);
54783 assert_eq_m128(r, _mm_setzero_ps());
54784 let r = _mm_maskz_broadcastss_ps(0b00001111, a);
54785 let e = _mm_set1_ps(20.);
54786 assert_eq_m128(r, e);
54787 }
54788
54789 #[simd_test(enable = "avx512f")]
54790 unsafe fn test_mm512_broadcast_i32x4() {
54791 let a = _mm_set_epi32(17, 18, 19, 20);
54792 let r = _mm512_broadcast_i32x4(a);
54793 let e = _mm512_set_epi32(
54794 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
54795 );
54796 assert_eq_m512i(r, e);
54797 }
54798
54799 #[simd_test(enable = "avx512f")]
54800 unsafe fn test_mm512_mask_broadcast_i32x4() {
54801 let src = _mm512_set1_epi32(20);
54802 let a = _mm_set_epi32(17, 18, 19, 20);
54803 let r = _mm512_mask_broadcast_i32x4(src, 0, a);
54804 assert_eq_m512i(r, src);
54805 let r = _mm512_mask_broadcast_i32x4(src, 0b11111111_11111111, a);
54806 let e = _mm512_set_epi32(
54807 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
54808 );
54809 assert_eq_m512i(r, e);
54810 }
54811
54812 #[simd_test(enable = "avx512f")]
54813 unsafe fn test_mm512_maskz_broadcast_i32x4() {
54814 let a = _mm_set_epi32(17, 18, 19, 20);
54815 let r = _mm512_maskz_broadcast_i32x4(0, a);
54816 assert_eq_m512i(r, _mm512_setzero_si512());
54817 let r = _mm512_maskz_broadcast_i32x4(0b00000000_11111111, a);
54818 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, 20, 17, 18, 19, 20);
54819 assert_eq_m512i(r, e);
54820 }
54821
54822 #[simd_test(enable = "avx512f,avx512vl")]
54823 unsafe fn test_mm256_broadcast_i32x4() {
54824 let a = _mm_set_epi32(17, 18, 19, 20);
54825 let r = _mm256_broadcast_i32x4(a);
54826 let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54827 assert_eq_m256i(r, e);
54828 }
54829
54830 #[simd_test(enable = "avx512f,avx512vl")]
54831 unsafe fn test_mm256_mask_broadcast_i32x4() {
54832 let src = _mm256_set1_epi32(20);
54833 let a = _mm_set_epi32(17, 18, 19, 20);
54834 let r = _mm256_mask_broadcast_i32x4(src, 0, a);
54835 assert_eq_m256i(r, src);
54836 let r = _mm256_mask_broadcast_i32x4(src, 0b11111111, a);
54837 let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54838 assert_eq_m256i(r, e);
54839 }
54840
54841 #[simd_test(enable = "avx512f,avx512vl")]
54842 unsafe fn test_mm256_maskz_broadcast_i32x4() {
54843 let a = _mm_set_epi32(17, 18, 19, 20);
54844 let r = _mm256_maskz_broadcast_i32x4(0, a);
54845 assert_eq_m256i(r, _mm256_setzero_si256());
54846 let r = _mm256_maskz_broadcast_i32x4(0b11111111, a);
54847 let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
54848 assert_eq_m256i(r, e);
54849 }
54850
54851 #[simd_test(enable = "avx512f")]
54852 unsafe fn test_mm512_broadcast_f32x4() {
54853 let a = _mm_set_ps(17., 18., 19., 20.);
54854 let r = _mm512_broadcast_f32x4(a);
54855 let e = _mm512_set_ps(
54856 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
54857 );
54858 assert_eq_m512(r, e);
54859 }
54860
54861 #[simd_test(enable = "avx512f")]
54862 unsafe fn test_mm512_mask_broadcast_f32x4() {
54863 let src = _mm512_set1_ps(20.);
54864 let a = _mm_set_ps(17., 18., 19., 20.);
54865 let r = _mm512_mask_broadcast_f32x4(src, 0, a);
54866 assert_eq_m512(r, src);
54867 let r = _mm512_mask_broadcast_f32x4(src, 0b11111111_11111111, a);
54868 let e = _mm512_set_ps(
54869 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
54870 );
54871 assert_eq_m512(r, e);
54872 }
54873
54874 #[simd_test(enable = "avx512f")]
54875 unsafe fn test_mm512_maskz_broadcast_f32x4() {
54876 let a = _mm_set_ps(17., 18., 19., 20.);
54877 let r = _mm512_maskz_broadcast_f32x4(0, a);
54878 assert_eq_m512(r, _mm512_setzero_ps());
54879 let r = _mm512_maskz_broadcast_f32x4(0b00000000_11111111, a);
54880 let e = _mm512_set_ps(
54881 0., 0., 0., 0., 0., 0., 0., 0., 17., 18., 19., 20., 17., 18., 19., 20.,
54882 );
54883 assert_eq_m512(r, e);
54884 }
54885
54886 #[simd_test(enable = "avx512f,avx512vl")]
54887 unsafe fn test_mm256_broadcast_f32x4() {
54888 let a = _mm_set_ps(17., 18., 19., 20.);
54889 let r = _mm256_broadcast_f32x4(a);
54890 let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54891 assert_eq_m256(r, e);
54892 }
54893
54894 #[simd_test(enable = "avx512f,avx512vl")]
54895 unsafe fn test_mm256_mask_broadcast_f32x4() {
54896 let src = _mm256_set1_ps(20.);
54897 let a = _mm_set_ps(17., 18., 19., 20.);
54898 let r = _mm256_mask_broadcast_f32x4(src, 0, a);
54899 assert_eq_m256(r, src);
54900 let r = _mm256_mask_broadcast_f32x4(src, 0b11111111, a);
54901 let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54902 assert_eq_m256(r, e);
54903 }
54904
54905 #[simd_test(enable = "avx512f,avx512vl")]
54906 unsafe fn test_mm256_maskz_broadcast_f32x4() {
54907 let a = _mm_set_ps(17., 18., 19., 20.);
54908 let r = _mm256_maskz_broadcast_f32x4(0, a);
54909 assert_eq_m256(r, _mm256_setzero_ps());
54910 let r = _mm256_maskz_broadcast_f32x4(0b11111111, a);
54911 let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
54912 assert_eq_m256(r, e);
54913 }
54914
54915 #[simd_test(enable = "avx512f")]
54916 unsafe fn test_mm512_mask_blend_epi32() {
54917 let a = _mm512_set1_epi32(1);
54918 let b = _mm512_set1_epi32(2);
54919 let r = _mm512_mask_blend_epi32(0b11111111_00000000, a, b);
54920 let e = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
54921 assert_eq_m512i(r, e);
54922 }
54923
54924 #[simd_test(enable = "avx512f,avx512vl")]
54925 unsafe fn test_mm256_mask_blend_epi32() {
54926 let a = _mm256_set1_epi32(1);
54927 let b = _mm256_set1_epi32(2);
54928 let r = _mm256_mask_blend_epi32(0b11111111, a, b);
54929 let e = _mm256_set1_epi32(2);
54930 assert_eq_m256i(r, e);
54931 }
54932
54933 #[simd_test(enable = "avx512f,avx512vl")]
54934 unsafe fn test_mm_mask_blend_epi32() {
54935 let a = _mm_set1_epi32(1);
54936 let b = _mm_set1_epi32(2);
54937 let r = _mm_mask_blend_epi32(0b00001111, a, b);
54938 let e = _mm_set1_epi32(2);
54939 assert_eq_m128i(r, e);
54940 }
54941
54942 #[simd_test(enable = "avx512f")]
54943 unsafe fn test_mm512_mask_blend_ps() {
54944 let a = _mm512_set1_ps(1.);
54945 let b = _mm512_set1_ps(2.);
54946 let r = _mm512_mask_blend_ps(0b11111111_00000000, a, b);
54947 let e = _mm512_set_ps(
54948 2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
54949 );
54950 assert_eq_m512(r, e);
54951 }
54952
54953 #[simd_test(enable = "avx512f,avx512vl")]
54954 unsafe fn test_mm256_mask_blend_ps() {
54955 let a = _mm256_set1_ps(1.);
54956 let b = _mm256_set1_ps(2.);
54957 let r = _mm256_mask_blend_ps(0b11111111, a, b);
54958 let e = _mm256_set1_ps(2.);
54959 assert_eq_m256(r, e);
54960 }
54961
54962 #[simd_test(enable = "avx512f,avx512vl")]
54963 unsafe fn test_mm_mask_blend_ps() {
54964 let a = _mm_set1_ps(1.);
54965 let b = _mm_set1_ps(2.);
54966 let r = _mm_mask_blend_ps(0b00001111, a, b);
54967 let e = _mm_set1_ps(2.);
54968 assert_eq_m128(r, e);
54969 }
54970
54971 #[simd_test(enable = "avx512f")]
54972 unsafe fn test_mm512_unpackhi_epi32() {
54973 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54974 let b = _mm512_set_epi32(
54975 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
54976 );
54977 let r = _mm512_unpackhi_epi32(a, b);
54978 let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
54979 assert_eq_m512i(r, e);
54980 }
54981
54982 #[simd_test(enable = "avx512f")]
54983 unsafe fn test_mm512_mask_unpackhi_epi32() {
54984 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54985 let b = _mm512_set_epi32(
54986 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
54987 );
54988 let r = _mm512_mask_unpackhi_epi32(a, 0, a, b);
54989 assert_eq_m512i(r, a);
54990 let r = _mm512_mask_unpackhi_epi32(a, 0b11111111_11111111, a, b);
54991 let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
54992 assert_eq_m512i(r, e);
54993 }
54994
54995 #[simd_test(enable = "avx512f")]
54996 unsafe fn test_mm512_maskz_unpackhi_epi32() {
54997 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
54998 let b = _mm512_set_epi32(
54999 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55000 );
55001 let r = _mm512_maskz_unpackhi_epi32(0, a, b);
55002 assert_eq_m512i(r, _mm512_setzero_si512());
55003 let r = _mm512_maskz_unpackhi_epi32(0b00000000_11111111, a, b);
55004 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 9, 26, 10, 29, 13, 30, 14);
55005 assert_eq_m512i(r, e);
55006 }
55007
55008 #[simd_test(enable = "avx512f,avx512vl")]
55009 unsafe fn test_mm256_mask_unpackhi_epi32() {
55010 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55011 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55012 let r = _mm256_mask_unpackhi_epi32(a, 0, a, b);
55013 assert_eq_m256i(r, a);
55014 let r = _mm256_mask_unpackhi_epi32(a, 0b11111111, a, b);
55015 let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
55016 assert_eq_m256i(r, e);
55017 }
55018
55019 #[simd_test(enable = "avx512f,avx512vl")]
55020 unsafe fn test_mm256_maskz_unpackhi_epi32() {
55021 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55022 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55023 let r = _mm256_maskz_unpackhi_epi32(0, a, b);
55024 assert_eq_m256i(r, _mm256_setzero_si256());
55025 let r = _mm256_maskz_unpackhi_epi32(0b11111111, a, b);
55026 let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
55027 assert_eq_m256i(r, e);
55028 }
55029
55030 #[simd_test(enable = "avx512f,avx512vl")]
55031 unsafe fn test_mm_mask_unpackhi_epi32() {
55032 let a = _mm_set_epi32(1, 2, 3, 4);
55033 let b = _mm_set_epi32(17, 18, 19, 20);
55034 let r = _mm_mask_unpackhi_epi32(a, 0, a, b);
55035 assert_eq_m128i(r, a);
55036 let r = _mm_mask_unpackhi_epi32(a, 0b00001111, a, b);
55037 let e = _mm_set_epi32(17, 1, 18, 2);
55038 assert_eq_m128i(r, e);
55039 }
55040
55041 #[simd_test(enable = "avx512f,avx512vl")]
55042 unsafe fn test_mm_maskz_unpackhi_epi32() {
55043 let a = _mm_set_epi32(1, 2, 3, 4);
55044 let b = _mm_set_epi32(17, 18, 19, 20);
55045 let r = _mm_maskz_unpackhi_epi32(0, a, b);
55046 assert_eq_m128i(r, _mm_setzero_si128());
55047 let r = _mm_maskz_unpackhi_epi32(0b00001111, a, b);
55048 let e = _mm_set_epi32(17, 1, 18, 2);
55049 assert_eq_m128i(r, e);
55050 }
55051
55052 #[simd_test(enable = "avx512f")]
55053 unsafe fn test_mm512_unpackhi_ps() {
55054 let a = _mm512_set_ps(
55055 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55056 );
55057 let b = _mm512_set_ps(
55058 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55059 );
55060 let r = _mm512_unpackhi_ps(a, b);
55061 let e = _mm512_set_ps(
55062 17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
55063 );
55064 assert_eq_m512(r, e);
55065 }
55066
55067 #[simd_test(enable = "avx512f")]
55068 unsafe fn test_mm512_mask_unpackhi_ps() {
55069 let a = _mm512_set_ps(
55070 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55071 );
55072 let b = _mm512_set_ps(
55073 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55074 );
55075 let r = _mm512_mask_unpackhi_ps(a, 0, a, b);
55076 assert_eq_m512(r, a);
55077 let r = _mm512_mask_unpackhi_ps(a, 0b11111111_11111111, a, b);
55078 let e = _mm512_set_ps(
55079 17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
55080 );
55081 assert_eq_m512(r, e);
55082 }
55083
55084 #[simd_test(enable = "avx512f")]
55085 unsafe fn test_mm512_maskz_unpackhi_ps() {
55086 let a = _mm512_set_ps(
55087 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55088 );
55089 let b = _mm512_set_ps(
55090 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55091 );
55092 let r = _mm512_maskz_unpackhi_ps(0, a, b);
55093 assert_eq_m512(r, _mm512_setzero_ps());
55094 let r = _mm512_maskz_unpackhi_ps(0b00000000_11111111, a, b);
55095 let e = _mm512_set_ps(
55096 0., 0., 0., 0., 0., 0., 0., 0., 25., 9., 26., 10., 29., 13., 30., 14.,
55097 );
55098 assert_eq_m512(r, e);
55099 }
55100
55101 #[simd_test(enable = "avx512f,avx512vl")]
55102 unsafe fn test_mm256_mask_unpackhi_ps() {
55103 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55104 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55105 let r = _mm256_mask_unpackhi_ps(a, 0, a, b);
55106 assert_eq_m256(r, a);
55107 let r = _mm256_mask_unpackhi_ps(a, 0b11111111, a, b);
55108 let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
55109 assert_eq_m256(r, e);
55110 }
55111
55112 #[simd_test(enable = "avx512f,avx512vl")]
55113 unsafe fn test_mm256_maskz_unpackhi_ps() {
55114 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55115 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55116 let r = _mm256_maskz_unpackhi_ps(0, a, b);
55117 assert_eq_m256(r, _mm256_setzero_ps());
55118 let r = _mm256_maskz_unpackhi_ps(0b11111111, a, b);
55119 let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
55120 assert_eq_m256(r, e);
55121 }
55122
55123 #[simd_test(enable = "avx512f,avx512vl")]
55124 unsafe fn test_mm_mask_unpackhi_ps() {
55125 let a = _mm_set_ps(1., 2., 3., 4.);
55126 let b = _mm_set_ps(17., 18., 19., 20.);
55127 let r = _mm_mask_unpackhi_ps(a, 0, a, b);
55128 assert_eq_m128(r, a);
55129 let r = _mm_mask_unpackhi_ps(a, 0b00001111, a, b);
55130 let e = _mm_set_ps(17., 1., 18., 2.);
55131 assert_eq_m128(r, e);
55132 }
55133
55134 #[simd_test(enable = "avx512f,avx512vl")]
55135 unsafe fn test_mm_maskz_unpackhi_ps() {
55136 let a = _mm_set_ps(1., 2., 3., 4.);
55137 let b = _mm_set_ps(17., 18., 19., 20.);
55138 let r = _mm_maskz_unpackhi_ps(0, a, b);
55139 assert_eq_m128(r, _mm_setzero_ps());
55140 let r = _mm_maskz_unpackhi_ps(0b00001111, a, b);
55141 let e = _mm_set_ps(17., 1., 18., 2.);
55142 assert_eq_m128(r, e);
55143 }
55144
55145 #[simd_test(enable = "avx512f")]
55146 unsafe fn test_mm512_unpacklo_epi32() {
55147 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55148 let b = _mm512_set_epi32(
55149 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55150 );
55151 let r = _mm512_unpacklo_epi32(a, b);
55152 let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
55153 assert_eq_m512i(r, e);
55154 }
55155
55156 #[simd_test(enable = "avx512f")]
55157 unsafe fn test_mm512_mask_unpacklo_epi32() {
55158 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55159 let b = _mm512_set_epi32(
55160 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55161 );
55162 let r = _mm512_mask_unpacklo_epi32(a, 0, a, b);
55163 assert_eq_m512i(r, a);
55164 let r = _mm512_mask_unpacklo_epi32(a, 0b11111111_11111111, a, b);
55165 let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
55166 assert_eq_m512i(r, e);
55167 }
55168
55169 #[simd_test(enable = "avx512f")]
55170 unsafe fn test_mm512_maskz_unpacklo_epi32() {
55171 let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
55172 let b = _mm512_set_epi32(
55173 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
55174 );
55175 let r = _mm512_maskz_unpacklo_epi32(0, a, b);
55176 assert_eq_m512i(r, _mm512_setzero_si512());
55177 let r = _mm512_maskz_unpacklo_epi32(0b00000000_11111111, a, b);
55178 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 27, 11, 28, 12, 31, 15, 32, 16);
55179 assert_eq_m512i(r, e);
55180 }
55181
55182 #[simd_test(enable = "avx512f,avx512vl")]
55183 unsafe fn test_mm256_mask_unpacklo_epi32() {
55184 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55185 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55186 let r = _mm256_mask_unpacklo_epi32(a, 0, a, b);
55187 assert_eq_m256i(r, a);
55188 let r = _mm256_mask_unpacklo_epi32(a, 0b11111111, a, b);
55189 let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
55190 assert_eq_m256i(r, e);
55191 }
55192
55193 #[simd_test(enable = "avx512f,avx512vl")]
55194 unsafe fn test_mm256_maskz_unpacklo_epi32() {
55195 let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
55196 let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
55197 let r = _mm256_maskz_unpacklo_epi32(0, a, b);
55198 assert_eq_m256i(r, _mm256_setzero_si256());
55199 let r = _mm256_maskz_unpacklo_epi32(0b11111111, a, b);
55200 let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
55201 assert_eq_m256i(r, e);
55202 }
55203
55204 #[simd_test(enable = "avx512f,avx512vl")]
55205 unsafe fn test_mm_mask_unpacklo_epi32() {
55206 let a = _mm_set_epi32(1, 2, 3, 4);
55207 let b = _mm_set_epi32(17, 18, 19, 20);
55208 let r = _mm_mask_unpacklo_epi32(a, 0, a, b);
55209 assert_eq_m128i(r, a);
55210 let r = _mm_mask_unpacklo_epi32(a, 0b00001111, a, b);
55211 let e = _mm_set_epi32(19, 3, 20, 4);
55212 assert_eq_m128i(r, e);
55213 }
55214
55215 #[simd_test(enable = "avx512f,avx512vl")]
55216 unsafe fn test_mm_maskz_unpacklo_epi32() {
55217 let a = _mm_set_epi32(1, 2, 3, 4);
55218 let b = _mm_set_epi32(17, 18, 19, 20);
55219 let r = _mm_maskz_unpacklo_epi32(0, a, b);
55220 assert_eq_m128i(r, _mm_setzero_si128());
55221 let r = _mm_maskz_unpacklo_epi32(0b00001111, a, b);
55222 let e = _mm_set_epi32(19, 3, 20, 4);
55223 assert_eq_m128i(r, e);
55224 }
55225
55226 #[simd_test(enable = "avx512f")]
55227 unsafe fn test_mm512_unpacklo_ps() {
55228 let a = _mm512_set_ps(
55229 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55230 );
55231 let b = _mm512_set_ps(
55232 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55233 );
55234 let r = _mm512_unpacklo_ps(a, b);
55235 let e = _mm512_set_ps(
55236 19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
55237 );
55238 assert_eq_m512(r, e);
55239 }
55240
55241 #[simd_test(enable = "avx512f")]
55242 unsafe fn test_mm512_mask_unpacklo_ps() {
55243 let a = _mm512_set_ps(
55244 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55245 );
55246 let b = _mm512_set_ps(
55247 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55248 );
55249 let r = _mm512_mask_unpacklo_ps(a, 0, a, b);
55250 assert_eq_m512(r, a);
55251 let r = _mm512_mask_unpacklo_ps(a, 0b11111111_11111111, a, b);
55252 let e = _mm512_set_ps(
55253 19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
55254 );
55255 assert_eq_m512(r, e);
55256 }
55257
55258 #[simd_test(enable = "avx512f")]
55259 unsafe fn test_mm512_maskz_unpacklo_ps() {
55260 let a = _mm512_set_ps(
55261 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
55262 );
55263 let b = _mm512_set_ps(
55264 17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
55265 );
55266 let r = _mm512_maskz_unpacklo_ps(0, a, b);
55267 assert_eq_m512(r, _mm512_setzero_ps());
55268 let r = _mm512_maskz_unpacklo_ps(0b00000000_11111111, a, b);
55269 let e = _mm512_set_ps(
55270 0., 0., 0., 0., 0., 0., 0., 0., 27., 11., 28., 12., 31., 15., 32., 16.,
55271 );
55272 assert_eq_m512(r, e);
55273 }
55274
55275 #[simd_test(enable = "avx512f,avx512vl")]
55276 unsafe fn test_mm256_mask_unpacklo_ps() {
55277 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55278 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55279 let r = _mm256_mask_unpacklo_ps(a, 0, a, b);
55280 assert_eq_m256(r, a);
55281 let r = _mm256_mask_unpacklo_ps(a, 0b11111111, a, b);
55282 let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
55283 assert_eq_m256(r, e);
55284 }
55285
55286 #[simd_test(enable = "avx512f,avx512vl")]
55287 unsafe fn test_mm256_maskz_unpacklo_ps() {
55288 let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
55289 let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
55290 let r = _mm256_maskz_unpacklo_ps(0, a, b);
55291 assert_eq_m256(r, _mm256_setzero_ps());
55292 let r = _mm256_maskz_unpacklo_ps(0b11111111, a, b);
55293 let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
55294 assert_eq_m256(r, e);
55295 }
55296
55297 #[simd_test(enable = "avx512f,avx512vl")]
55298 unsafe fn test_mm_mask_unpacklo_ps() {
55299 let a = _mm_set_ps(1., 2., 3., 4.);
55300 let b = _mm_set_ps(17., 18., 19., 20.);
55301 let r = _mm_mask_unpacklo_ps(a, 0, a, b);
55302 assert_eq_m128(r, a);
55303 let r = _mm_mask_unpacklo_ps(a, 0b00001111, a, b);
55304 let e = _mm_set_ps(19., 3., 20., 4.);
55305 assert_eq_m128(r, e);
55306 }
55307
55308 #[simd_test(enable = "avx512f,avx512vl")]
55309 unsafe fn test_mm_maskz_unpacklo_ps() {
55310 let a = _mm_set_ps(1., 2., 3., 4.);
55311 let b = _mm_set_ps(17., 18., 19., 20.);
55312 let r = _mm_maskz_unpacklo_ps(0, a, b);
55313 assert_eq_m128(r, _mm_setzero_ps());
55314 let r = _mm_maskz_unpacklo_ps(0b00001111, a, b);
55315 let e = _mm_set_ps(19., 3., 20., 4.);
55316 assert_eq_m128(r, e);
55317 }
55318
55319 #[simd_test(enable = "avx512f")]
55320 unsafe fn test_mm512_alignr_epi32() {
55321 let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55322 let b = _mm512_set_epi32(
55323 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55324 );
55325 let r = _mm512_alignr_epi32::<0>(a, b);
55326 assert_eq_m512i(r, b);
55327 let r = _mm512_alignr_epi32::<16>(a, b);
55328 assert_eq_m512i(r, b);
55329 let r = _mm512_alignr_epi32::<1>(a, b);
55330 let e = _mm512_set_epi32(
55331 1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
55332 );
55333 assert_eq_m512i(r, e);
55334 }
55335
55336 #[simd_test(enable = "avx512f")]
55337 unsafe fn test_mm512_mask_alignr_epi32() {
55338 let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55339 let b = _mm512_set_epi32(
55340 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55341 );
55342 let r = _mm512_mask_alignr_epi32::<1>(a, 0, a, b);
55343 assert_eq_m512i(r, a);
55344 let r = _mm512_mask_alignr_epi32::<1>(a, 0b11111111_11111111, a, b);
55345 let e = _mm512_set_epi32(
55346 1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
55347 );
55348 assert_eq_m512i(r, e);
55349 }
55350
55351 #[simd_test(enable = "avx512f")]
55352 unsafe fn test_mm512_maskz_alignr_epi32() {
55353 let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
55354 let b = _mm512_set_epi32(
55355 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
55356 );
55357 let r = _mm512_maskz_alignr_epi32::<1>(0, a, b);
55358 assert_eq_m512i(r, _mm512_setzero_si512());
55359 let r = _mm512_maskz_alignr_epi32::<1>(0b00000000_11111111, a, b);
55360 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 24, 23, 22, 21, 20, 19, 18);
55361 assert_eq_m512i(r, e);
55362 }
55363
55364 #[simd_test(enable = "avx512f,avx512vl")]
55365 unsafe fn test_mm256_alignr_epi32() {
55366 let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55367 let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55368 let r = _mm256_alignr_epi32::<0>(a, b);
55369 assert_eq_m256i(r, b);
55370 let r = _mm256_alignr_epi32::<1>(a, b);
55371 let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55372 assert_eq_m256i(r, e);
55373 }
55374
55375 #[simd_test(enable = "avx512f,avx512vl")]
55376 unsafe fn test_mm256_mask_alignr_epi32() {
55377 let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55378 let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55379 let r = _mm256_mask_alignr_epi32::<1>(a, 0, a, b);
55380 assert_eq_m256i(r, a);
55381 let r = _mm256_mask_alignr_epi32::<1>(a, 0b11111111, a, b);
55382 let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55383 assert_eq_m256i(r, e);
55384 }
55385
55386 #[simd_test(enable = "avx512f,avx512vl")]
55387 unsafe fn test_mm256_maskz_alignr_epi32() {
55388 let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
55389 let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
55390 let r = _mm256_maskz_alignr_epi32::<1>(0, a, b);
55391 assert_eq_m256i(r, _mm256_setzero_si256());
55392 let r = _mm256_maskz_alignr_epi32::<1>(0b11111111, a, b);
55393 let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
55394 assert_eq_m256i(r, e);
55395 }
55396
55397 #[simd_test(enable = "avx512f,avx512vl")]
55398 unsafe fn test_mm_alignr_epi32() {
55399 let a = _mm_set_epi32(4, 3, 2, 1);
55400 let b = _mm_set_epi32(8, 7, 6, 5);
55401 let r = _mm_alignr_epi32::<0>(a, b);
55402 assert_eq_m128i(r, b);
55403 let r = _mm_alignr_epi32::<1>(a, b);
55404 let e = _mm_set_epi32(1, 8, 7, 6);
55405 assert_eq_m128i(r, e);
55406 }
55407
55408 #[simd_test(enable = "avx512f,avx512vl")]
55409 unsafe fn test_mm_mask_alignr_epi32() {
55410 let a = _mm_set_epi32(4, 3, 2, 1);
55411 let b = _mm_set_epi32(8, 7, 6, 5);
55412 let r = _mm_mask_alignr_epi32::<1>(a, 0, a, b);
55413 assert_eq_m128i(r, a);
55414 let r = _mm_mask_alignr_epi32::<1>(a, 0b00001111, a, b);
55415 let e = _mm_set_epi32(1, 8, 7, 6);
55416 assert_eq_m128i(r, e);
55417 }
55418
55419 #[simd_test(enable = "avx512f,avx512vl")]
55420 unsafe fn test_mm_maskz_alignr_epi32() {
55421 let a = _mm_set_epi32(4, 3, 2, 1);
55422 let b = _mm_set_epi32(8, 7, 6, 5);
55423 let r = _mm_maskz_alignr_epi32::<1>(0, a, b);
55424 assert_eq_m128i(r, _mm_setzero_si128());
55425 let r = _mm_maskz_alignr_epi32::<1>(0b00001111, a, b);
55426 let e = _mm_set_epi32(1, 8, 7, 6);
55427 assert_eq_m128i(r, e);
55428 }
55429
55430 #[simd_test(enable = "avx512f")]
55431 unsafe fn test_mm512_and_epi32() {
55432 #[rustfmt::skip]
55433 let a = _mm512_set_epi32(
55434 1 << 1 | 1 << 2, 0, 0, 0,
55435 0, 0, 0, 0,
55436 0, 0, 0, 0,
55437 0, 0, 0, 1 << 1 | 1 << 3,
55438 );
55439 #[rustfmt::skip]
55440 let b = _mm512_set_epi32(
55441 1 << 1, 0, 0, 0,
55442 0, 0, 0, 0,
55443 0, 0, 0, 0,
55444 0, 0, 0, 1 << 3 | 1 << 4,
55445 );
55446 let r = _mm512_and_epi32(a, b);
55447 let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55448 assert_eq_m512i(r, e);
55449 }
55450
55451 #[simd_test(enable = "avx512f")]
55452 unsafe fn test_mm512_mask_and_epi32() {
55453 #[rustfmt::skip]
55454 let a = _mm512_set_epi32(
55455 1 << 1 | 1 << 2, 0, 0, 0,
55456 0, 0, 0, 0,
55457 0, 0, 0, 0,
55458 0, 0, 0, 1 << 1 | 1 << 3,
55459 );
55460 #[rustfmt::skip]
55461 let b = _mm512_set_epi32(
55462 1 << 1, 0, 0, 0,
55463 0, 0, 0, 0,
55464 0, 0, 0, 0,
55465 0, 0, 0, 1 << 3 | 1 << 4,
55466 );
55467 let r = _mm512_mask_and_epi32(a, 0, a, b);
55468 assert_eq_m512i(r, a);
55469 let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
55470 #[rustfmt::skip]
55471 let e = _mm512_set_epi32(
55472 1 << 1 | 1 << 2, 0, 0, 0,
55473 0, 0, 0, 0,
55474 0, 0, 0, 0,
55475 0, 0, 0, 1 << 3,
55476 );
55477 assert_eq_m512i(r, e);
55478 }
55479
55480 #[simd_test(enable = "avx512f")]
55481 unsafe fn test_mm512_maskz_and_epi32() {
55482 #[rustfmt::skip]
55483 let a = _mm512_set_epi32(
55484 1 << 1 | 1 << 2, 0, 0, 0,
55485 0, 0, 0, 0,
55486 0, 0, 0, 0,
55487 0, 0, 0, 1 << 1 | 1 << 3,
55488 );
55489 #[rustfmt::skip]
55490 let b = _mm512_set_epi32(
55491 1 << 1, 0, 0, 0,
55492 0, 0, 0, 0,
55493 0, 0, 0, 0,
55494 0, 0, 0, 1 << 3 | 1 << 4,
55495 );
55496 let r = _mm512_maskz_and_epi32(0, a, b);
55497 assert_eq_m512i(r, _mm512_setzero_si512());
55498 let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
55499 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55500 assert_eq_m512i(r, e);
55501 }
55502
55503 #[simd_test(enable = "avx512f,avx512vl")]
55504 unsafe fn test_mm256_mask_and_epi32() {
55505 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55506 let b = _mm256_set1_epi32(1 << 1);
55507 let r = _mm256_mask_and_epi32(a, 0, a, b);
55508 assert_eq_m256i(r, a);
55509 let r = _mm256_mask_and_epi32(a, 0b11111111, a, b);
55510 let e = _mm256_set1_epi32(1 << 1);
55511 assert_eq_m256i(r, e);
55512 }
55513
55514 #[simd_test(enable = "avx512f,avx512vl")]
55515 unsafe fn test_mm256_maskz_and_epi32() {
55516 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55517 let b = _mm256_set1_epi32(1 << 1);
55518 let r = _mm256_maskz_and_epi32(0, a, b);
55519 assert_eq_m256i(r, _mm256_setzero_si256());
55520 let r = _mm256_maskz_and_epi32(0b11111111, a, b);
55521 let e = _mm256_set1_epi32(1 << 1);
55522 assert_eq_m256i(r, e);
55523 }
55524
55525 #[simd_test(enable = "avx512f,avx512vl")]
55526 unsafe fn test_mm_mask_and_epi32() {
55527 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55528 let b = _mm_set1_epi32(1 << 1);
55529 let r = _mm_mask_and_epi32(a, 0, a, b);
55530 assert_eq_m128i(r, a);
55531 let r = _mm_mask_and_epi32(a, 0b00001111, a, b);
55532 let e = _mm_set1_epi32(1 << 1);
55533 assert_eq_m128i(r, e);
55534 }
55535
55536 #[simd_test(enable = "avx512f,avx512vl")]
55537 unsafe fn test_mm_maskz_and_epi32() {
55538 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55539 let b = _mm_set1_epi32(1 << 1);
55540 let r = _mm_maskz_and_epi32(0, a, b);
55541 assert_eq_m128i(r, _mm_setzero_si128());
55542 let r = _mm_maskz_and_epi32(0b00001111, a, b);
55543 let e = _mm_set1_epi32(1 << 1);
55544 assert_eq_m128i(r, e);
55545 }
55546
55547 #[simd_test(enable = "avx512f")]
55548 unsafe fn test_mm512_and_si512() {
55549 #[rustfmt::skip]
55550 let a = _mm512_set_epi32(
55551 1 << 1 | 1 << 2, 0, 0, 0,
55552 0, 0, 0, 0,
55553 0, 0, 0, 0,
55554 0, 0, 0, 1 << 1 | 1 << 3,
55555 );
55556 #[rustfmt::skip]
55557 let b = _mm512_set_epi32(
55558 1 << 1, 0, 0, 0,
55559 0, 0, 0, 0,
55560 0, 0, 0, 0,
55561 0, 0, 0, 1 << 3 | 1 << 4,
55562 );
55563 let r = _mm512_and_epi32(a, b);
55564 let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
55565 assert_eq_m512i(r, e);
55566 }
55567
55568 #[simd_test(enable = "avx512f")]
55569 unsafe fn test_mm512_or_epi32() {
55570 #[rustfmt::skip]
55571 let a = _mm512_set_epi32(
55572 1 << 1 | 1 << 2, 0, 0, 0,
55573 0, 0, 0, 0,
55574 0, 0, 0, 0,
55575 0, 0, 0, 1 << 1 | 1 << 3,
55576 );
55577 #[rustfmt::skip]
55578 let b = _mm512_set_epi32(
55579 1 << 1, 0, 0, 0,
55580 0, 0, 0, 0,
55581 0, 0, 0, 0,
55582 0, 0, 0, 1 << 3 | 1 << 4,
55583 );
55584 let r = _mm512_or_epi32(a, b);
55585 #[rustfmt::skip]
55586 let e = _mm512_set_epi32(
55587 1 << 1 | 1 << 2, 0, 0, 0,
55588 0, 0, 0, 0,
55589 0, 0, 0, 0,
55590 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55591 );
55592 assert_eq_m512i(r, e);
55593 }
55594
55595 #[simd_test(enable = "avx512f")]
55596 unsafe fn test_mm512_mask_or_epi32() {
55597 #[rustfmt::skip]
55598 let a = _mm512_set_epi32(
55599 1 << 1 | 1 << 2, 0, 0, 0,
55600 0, 0, 0, 0,
55601 0, 0, 0, 0,
55602 0, 0, 0, 1 << 1 | 1 << 3,
55603 );
55604 #[rustfmt::skip]
55605 let b = _mm512_set_epi32(
55606 1 << 1, 0, 0, 0,
55607 0, 0, 0, 0,
55608 0, 0, 0, 0,
55609 0, 0, 0, 1 << 3 | 1 << 4,
55610 );
55611 let r = _mm512_mask_or_epi32(a, 0, a, b);
55612 assert_eq_m512i(r, a);
55613 let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
55614 #[rustfmt::skip]
55615 let e = _mm512_set_epi32(
55616 1 << 1 | 1 << 2, 0, 0, 0,
55617 0, 0, 0, 0,
55618 0, 0, 0, 0,
55619 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55620 );
55621 assert_eq_m512i(r, e);
55622 }
55623
55624 #[simd_test(enable = "avx512f")]
55625 unsafe fn test_mm512_maskz_or_epi32() {
55626 #[rustfmt::skip]
55627 let a = _mm512_set_epi32(
55628 1 << 1 | 1 << 2, 0, 0, 0,
55629 0, 0, 0, 0,
55630 0, 0, 0, 0,
55631 0, 0, 0, 1 << 1 | 1 << 3,
55632 );
55633 #[rustfmt::skip]
55634 let b = _mm512_set_epi32(
55635 1 << 1, 0, 0, 0,
55636 0, 0, 0, 0,
55637 0, 0, 0, 0,
55638 0, 0, 0, 1 << 3 | 1 << 4,
55639 );
55640 let r = _mm512_maskz_or_epi32(0, a, b);
55641 assert_eq_m512i(r, _mm512_setzero_si512());
55642 let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
55643 #[rustfmt::skip]
55644 let e = _mm512_set_epi32(
55645 0, 0, 0, 0,
55646 0, 0, 0, 0,
55647 0, 0, 0, 0,
55648 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55649 );
55650 assert_eq_m512i(r, e);
55651 }
55652
55653 #[simd_test(enable = "avx512f,avx512vl")]
55654 unsafe fn test_mm256_or_epi32() {
55655 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55656 let b = _mm256_set1_epi32(1 << 1);
55657 let r = _mm256_or_epi32(a, b);
55658 let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55659 assert_eq_m256i(r, e);
55660 }
55661
55662 #[simd_test(enable = "avx512f,avx512vl")]
55663 unsafe fn test_mm256_mask_or_epi32() {
55664 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55665 let b = _mm256_set1_epi32(1 << 1);
55666 let r = _mm256_mask_or_epi32(a, 0, a, b);
55667 assert_eq_m256i(r, a);
55668 let r = _mm256_mask_or_epi32(a, 0b11111111, a, b);
55669 let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55670 assert_eq_m256i(r, e);
55671 }
55672
55673 #[simd_test(enable = "avx512f,avx512vl")]
55674 unsafe fn test_mm256_maskz_or_epi32() {
55675 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55676 let b = _mm256_set1_epi32(1 << 1);
55677 let r = _mm256_maskz_or_epi32(0, a, b);
55678 assert_eq_m256i(r, _mm256_setzero_si256());
55679 let r = _mm256_maskz_or_epi32(0b11111111, a, b);
55680 let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
55681 assert_eq_m256i(r, e);
55682 }
55683
55684 #[simd_test(enable = "avx512f,avx512vl")]
55685 unsafe fn test_mm_or_epi32() {
55686 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55687 let b = _mm_set1_epi32(1 << 1);
55688 let r = _mm_or_epi32(a, b);
55689 let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55690 assert_eq_m128i(r, e);
55691 }
55692
55693 #[simd_test(enable = "avx512f,avx512vl")]
55694 unsafe fn test_mm_mask_or_epi32() {
55695 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55696 let b = _mm_set1_epi32(1 << 1);
55697 let r = _mm_mask_or_epi32(a, 0, a, b);
55698 assert_eq_m128i(r, a);
55699 let r = _mm_mask_or_epi32(a, 0b00001111, a, b);
55700 let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55701 assert_eq_m128i(r, e);
55702 }
55703
55704 #[simd_test(enable = "avx512f,avx512vl")]
55705 unsafe fn test_mm_maskz_or_epi32() {
55706 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55707 let b = _mm_set1_epi32(1 << 1);
55708 let r = _mm_maskz_or_epi32(0, a, b);
55709 assert_eq_m128i(r, _mm_setzero_si128());
55710 let r = _mm_maskz_or_epi32(0b00001111, a, b);
55711 let e = _mm_set1_epi32(1 << 1 | 1 << 2);
55712 assert_eq_m128i(r, e);
55713 }
55714
55715 #[simd_test(enable = "avx512f")]
55716 unsafe fn test_mm512_or_si512() {
55717 #[rustfmt::skip]
55718 let a = _mm512_set_epi32(
55719 1 << 1 | 1 << 2, 0, 0, 0,
55720 0, 0, 0, 0,
55721 0, 0, 0, 0,
55722 0, 0, 0, 1 << 1 | 1 << 3,
55723 );
55724 #[rustfmt::skip]
55725 let b = _mm512_set_epi32(
55726 1 << 1, 0, 0, 0,
55727 0, 0, 0, 0,
55728 0, 0, 0, 0,
55729 0, 0, 0, 1 << 3 | 1 << 4,
55730 );
55731 let r = _mm512_or_epi32(a, b);
55732 #[rustfmt::skip]
55733 let e = _mm512_set_epi32(
55734 1 << 1 | 1 << 2, 0, 0, 0,
55735 0, 0, 0, 0,
55736 0, 0, 0, 0,
55737 0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
55738 );
55739 assert_eq_m512i(r, e);
55740 }
55741
55742 #[simd_test(enable = "avx512f")]
55743 unsafe fn test_mm512_xor_epi32() {
55744 #[rustfmt::skip]
55745 let a = _mm512_set_epi32(
55746 1 << 1 | 1 << 2, 0, 0, 0,
55747 0, 0, 0, 0,
55748 0, 0, 0, 0,
55749 0, 0, 0, 1 << 1 | 1 << 3,
55750 );
55751 #[rustfmt::skip]
55752 let b = _mm512_set_epi32(
55753 1 << 1, 0, 0, 0,
55754 0, 0, 0, 0,
55755 0, 0, 0, 0,
55756 0, 0, 0, 1 << 3 | 1 << 4,
55757 );
55758 let r = _mm512_xor_epi32(a, b);
55759 #[rustfmt::skip]
55760 let e = _mm512_set_epi32(
55761 1 << 2, 0, 0, 0,
55762 0, 0, 0, 0,
55763 0, 0, 0, 0,
55764 0, 0, 0, 1 << 1 | 1 << 4,
55765 );
55766 assert_eq_m512i(r, e);
55767 }
55768
55769 #[simd_test(enable = "avx512f")]
55770 unsafe fn test_mm512_mask_xor_epi32() {
55771 #[rustfmt::skip]
55772 let a = _mm512_set_epi32(
55773 1 << 1 | 1 << 2, 0, 0, 0,
55774 0, 0, 0, 0,
55775 0, 0, 0, 0,
55776 0, 0, 0, 1 << 1 | 1 << 3,
55777 );
55778 #[rustfmt::skip]
55779 let b = _mm512_set_epi32(
55780 1 << 1, 0, 0, 0,
55781 0, 0, 0, 0,
55782 0, 0, 0, 0,
55783 0, 0, 0, 1 << 3 | 1 << 4,
55784 );
55785 let r = _mm512_mask_xor_epi32(a, 0, a, b);
55786 assert_eq_m512i(r, a);
55787 let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
55788 #[rustfmt::skip]
55789 let e = _mm512_set_epi32(
55790 1 << 1 | 1 << 2, 0, 0, 0,
55791 0, 0, 0, 0,
55792 0, 0, 0, 0,
55793 0, 0, 0, 1 << 1 | 1 << 4,
55794 );
55795 assert_eq_m512i(r, e);
55796 }
55797
55798 #[simd_test(enable = "avx512f")]
55799 unsafe fn test_mm512_maskz_xor_epi32() {
55800 #[rustfmt::skip]
55801 let a = _mm512_set_epi32(
55802 1 << 1 | 1 << 2, 0, 0, 0,
55803 0, 0, 0, 0,
55804 0, 0, 0, 0,
55805 0, 0, 0, 1 << 1 | 1 << 3,
55806 );
55807 #[rustfmt::skip]
55808 let b = _mm512_set_epi32(
55809 1 << 1, 0, 0, 0,
55810 0, 0, 0, 0,
55811 0, 0, 0, 0,
55812 0, 0, 0, 1 << 3 | 1 << 4,
55813 );
55814 let r = _mm512_maskz_xor_epi32(0, a, b);
55815 assert_eq_m512i(r, _mm512_setzero_si512());
55816 let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
55817 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
55818 assert_eq_m512i(r, e);
55819 }
55820
55821 #[simd_test(enable = "avx512f,avx512vl")]
55822 unsafe fn test_mm256_xor_epi32() {
55823 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55824 let b = _mm256_set1_epi32(1 << 1);
55825 let r = _mm256_xor_epi32(a, b);
55826 let e = _mm256_set1_epi32(1 << 2);
55827 assert_eq_m256i(r, e);
55828 }
55829
55830 #[simd_test(enable = "avx512f,avx512vl")]
55831 unsafe fn test_mm256_mask_xor_epi32() {
55832 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55833 let b = _mm256_set1_epi32(1 << 1);
55834 let r = _mm256_mask_xor_epi32(a, 0, a, b);
55835 assert_eq_m256i(r, a);
55836 let r = _mm256_mask_xor_epi32(a, 0b11111111, a, b);
55837 let e = _mm256_set1_epi32(1 << 2);
55838 assert_eq_m256i(r, e);
55839 }
55840
55841 #[simd_test(enable = "avx512f,avx512vl")]
55842 unsafe fn test_mm256_maskz_xor_epi32() {
55843 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55844 let b = _mm256_set1_epi32(1 << 1);
55845 let r = _mm256_maskz_xor_epi32(0, a, b);
55846 assert_eq_m256i(r, _mm256_setzero_si256());
55847 let r = _mm256_maskz_xor_epi32(0b11111111, a, b);
55848 let e = _mm256_set1_epi32(1 << 2);
55849 assert_eq_m256i(r, e);
55850 }
55851
55852 #[simd_test(enable = "avx512f,avx512vl")]
55853 unsafe fn test_mm_xor_epi32() {
55854 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55855 let b = _mm_set1_epi32(1 << 1);
55856 let r = _mm_xor_epi32(a, b);
55857 let e = _mm_set1_epi32(1 << 2);
55858 assert_eq_m128i(r, e);
55859 }
55860
55861 #[simd_test(enable = "avx512f,avx512vl")]
55862 unsafe fn test_mm_mask_xor_epi32() {
55863 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55864 let b = _mm_set1_epi32(1 << 1);
55865 let r = _mm_mask_xor_epi32(a, 0, a, b);
55866 assert_eq_m128i(r, a);
55867 let r = _mm_mask_xor_epi32(a, 0b00001111, a, b);
55868 let e = _mm_set1_epi32(1 << 2);
55869 assert_eq_m128i(r, e);
55870 }
55871
55872 #[simd_test(enable = "avx512f,avx512vl")]
55873 unsafe fn test_mm_maskz_xor_epi32() {
55874 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55875 let b = _mm_set1_epi32(1 << 1);
55876 let r = _mm_maskz_xor_epi32(0, a, b);
55877 assert_eq_m128i(r, _mm_setzero_si128());
55878 let r = _mm_maskz_xor_epi32(0b00001111, a, b);
55879 let e = _mm_set1_epi32(1 << 2);
55880 assert_eq_m128i(r, e);
55881 }
55882
55883 #[simd_test(enable = "avx512f")]
55884 unsafe fn test_mm512_xor_si512() {
55885 #[rustfmt::skip]
55886 let a = _mm512_set_epi32(
55887 1 << 1 | 1 << 2, 0, 0, 0,
55888 0, 0, 0, 0,
55889 0, 0, 0, 0,
55890 0, 0, 0, 1 << 1 | 1 << 3,
55891 );
55892 #[rustfmt::skip]
55893 let b = _mm512_set_epi32(
55894 1 << 1, 0, 0, 0,
55895 0, 0, 0, 0,
55896 0, 0, 0, 0,
55897 0, 0, 0, 1 << 3 | 1 << 4,
55898 );
55899 let r = _mm512_xor_epi32(a, b);
55900 #[rustfmt::skip]
55901 let e = _mm512_set_epi32(
55902 1 << 2, 0, 0, 0,
55903 0, 0, 0, 0,
55904 0, 0, 0, 0,
55905 0, 0, 0, 1 << 1 | 1 << 4,
55906 );
55907 assert_eq_m512i(r, e);
55908 }
55909
55910 #[simd_test(enable = "avx512f")]
55911 unsafe fn test_mm512_andnot_epi32() {
55912 let a = _mm512_set1_epi32(0);
55913 let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
55914 let r = _mm512_andnot_epi32(a, b);
55915 let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
55916 assert_eq_m512i(r, e);
55917 }
55918
55919 #[simd_test(enable = "avx512f")]
55920 unsafe fn test_mm512_mask_andnot_epi32() {
55921 let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
55922 let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
55923 let r = _mm512_mask_andnot_epi32(a, 0, a, b);
55924 assert_eq_m512i(r, a);
55925 let r = _mm512_mask_andnot_epi32(a, 0b11111111_11111111, a, b);
55926 let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
55927 assert_eq_m512i(r, e);
55928 }
55929
55930 #[simd_test(enable = "avx512f")]
55931 unsafe fn test_mm512_maskz_andnot_epi32() {
55932 let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
55933 let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
55934 let r = _mm512_maskz_andnot_epi32(0, a, b);
55935 assert_eq_m512i(r, _mm512_setzero_si512());
55936 let r = _mm512_maskz_andnot_epi32(0b00000000_11111111, a, b);
55937 #[rustfmt::skip]
55938 let e = _mm512_set_epi32(
55939 0, 0, 0, 0,
55940 0, 0, 0, 0,
55941 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
55942 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
55943 );
55944 assert_eq_m512i(r, e);
55945 }
55946
55947 #[simd_test(enable = "avx512f,avx512vl")]
55948 unsafe fn test_mm256_mask_andnot_epi32() {
55949 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55950 let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
55951 let r = _mm256_mask_andnot_epi32(a, 0, a, b);
55952 assert_eq_m256i(r, a);
55953 let r = _mm256_mask_andnot_epi32(a, 0b11111111, a, b);
55954 let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
55955 assert_eq_m256i(r, e);
55956 }
55957
55958 #[simd_test(enable = "avx512f,avx512vl")]
55959 unsafe fn test_mm256_maskz_andnot_epi32() {
55960 let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
55961 let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
55962 let r = _mm256_maskz_andnot_epi32(0, a, b);
55963 assert_eq_m256i(r, _mm256_setzero_si256());
55964 let r = _mm256_maskz_andnot_epi32(0b11111111, a, b);
55965 let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
55966 assert_eq_m256i(r, e);
55967 }
55968
55969 #[simd_test(enable = "avx512f,avx512vl")]
55970 unsafe fn test_mm_mask_andnot_epi32() {
55971 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55972 let b = _mm_set1_epi32(1 << 3 | 1 << 4);
55973 let r = _mm_mask_andnot_epi32(a, 0, a, b);
55974 assert_eq_m128i(r, a);
55975 let r = _mm_mask_andnot_epi32(a, 0b00001111, a, b);
55976 let e = _mm_set1_epi32(1 << 3 | 1 << 4);
55977 assert_eq_m128i(r, e);
55978 }
55979
55980 #[simd_test(enable = "avx512f,avx512vl")]
55981 unsafe fn test_mm_maskz_andnot_epi32() {
55982 let a = _mm_set1_epi32(1 << 1 | 1 << 2);
55983 let b = _mm_set1_epi32(1 << 3 | 1 << 4);
55984 let r = _mm_maskz_andnot_epi32(0, a, b);
55985 assert_eq_m128i(r, _mm_setzero_si128());
55986 let r = _mm_maskz_andnot_epi32(0b00001111, a, b);
55987 let e = _mm_set1_epi32(1 << 3 | 1 << 4);
55988 assert_eq_m128i(r, e);
55989 }
55990
55991 #[simd_test(enable = "avx512f")]
55992 unsafe fn test_cvtmask16_u32() {
55993 let a: __mmask16 = 0b11001100_00110011;
55994 let r = _cvtmask16_u32(a);
55995 let e: u32 = 0b11001100_00110011;
55996 assert_eq!(r, e);
55997 }
55998
55999 #[simd_test(enable = "avx512f")]
56000 unsafe fn test_cvtu32_mask16() {
56001 let a: u32 = 0b11001100_00110011;
56002 let r = _cvtu32_mask16(a);
56003 let e: __mmask16 = 0b11001100_00110011;
56004 assert_eq!(r, e);
56005 }
56006
56007 #[simd_test(enable = "avx512f")]
56008 unsafe fn test_mm512_kand() {
56009 let a: u16 = 0b11001100_00110011;
56010 let b: u16 = 0b11001100_00110011;
56011 let r = _mm512_kand(a, b);
56012 let e: u16 = 0b11001100_00110011;
56013 assert_eq!(r, e);
56014 }
56015
56016 #[simd_test(enable = "avx512f")]
56017 unsafe fn test_kand_mask16() {
56018 let a: u16 = 0b11001100_00110011;
56019 let b: u16 = 0b11001100_00110011;
56020 let r = _kand_mask16(a, b);
56021 let e: u16 = 0b11001100_00110011;
56022 assert_eq!(r, e);
56023 }
56024
56025 #[simd_test(enable = "avx512f")]
56026 unsafe fn test_mm512_kor() {
56027 let a: u16 = 0b11001100_00110011;
56028 let b: u16 = 0b00101110_00001011;
56029 let r = _mm512_kor(a, b);
56030 let e: u16 = 0b11101110_00111011;
56031 assert_eq!(r, e);
56032 }
56033
56034 #[simd_test(enable = "avx512f")]
56035 unsafe fn test_kor_mask16() {
56036 let a: u16 = 0b11001100_00110011;
56037 let b: u16 = 0b00101110_00001011;
56038 let r = _kor_mask16(a, b);
56039 let e: u16 = 0b11101110_00111011;
56040 assert_eq!(r, e);
56041 }
56042
56043 #[simd_test(enable = "avx512f")]
56044 unsafe fn test_mm512_kxor() {
56045 let a: u16 = 0b11001100_00110011;
56046 let b: u16 = 0b00101110_00001011;
56047 let r = _mm512_kxor(a, b);
56048 let e: u16 = 0b11100010_00111000;
56049 assert_eq!(r, e);
56050 }
56051
56052 #[simd_test(enable = "avx512f")]
56053 unsafe fn test_kxor_mask16() {
56054 let a: u16 = 0b11001100_00110011;
56055 let b: u16 = 0b00101110_00001011;
56056 let r = _kxor_mask16(a, b);
56057 let e: u16 = 0b11100010_00111000;
56058 assert_eq!(r, e);
56059 }
56060
56061 #[simd_test(enable = "avx512f")]
56062 unsafe fn test_mm512_knot() {
56063 let a: u16 = 0b11001100_00110011;
56064 let r = _mm512_knot(a);
56065 let e: u16 = 0b00110011_11001100;
56066 assert_eq!(r, e);
56067 }
56068
56069 #[simd_test(enable = "avx512f")]
56070 unsafe fn test_knot_mask16() {
56071 let a: u16 = 0b11001100_00110011;
56072 let r = _knot_mask16(a);
56073 let e: u16 = 0b00110011_11001100;
56074 assert_eq!(r, e);
56075 }
56076
56077 #[simd_test(enable = "avx512f")]
56078 unsafe fn test_mm512_kandn() {
56079 let a: u16 = 0b11001100_00110011;
56080 let b: u16 = 0b00101110_00001011;
56081 let r = _mm512_kandn(a, b);
56082 let e: u16 = 0b00100010_00001000;
56083 assert_eq!(r, e);
56084 }
56085
56086 #[simd_test(enable = "avx512f")]
56087 unsafe fn test_kandn_mask16() {
56088 let a: u16 = 0b11001100_00110011;
56089 let b: u16 = 0b00101110_00001011;
56090 let r = _kandn_mask16(a, b);
56091 let e: u16 = 0b00100010_00001000;
56092 assert_eq!(r, e);
56093 }
56094
56095 #[simd_test(enable = "avx512f")]
56096 unsafe fn test_mm512_kxnor() {
56097 let a: u16 = 0b11001100_00110011;
56098 let b: u16 = 0b00101110_00001011;
56099 let r = _mm512_kxnor(a, b);
56100 let e: u16 = 0b00011101_11000111;
56101 assert_eq!(r, e);
56102 }
56103
56104 #[simd_test(enable = "avx512f")]
56105 unsafe fn test_kxnor_mask16() {
56106 let a: u16 = 0b11001100_00110011;
56107 let b: u16 = 0b00101110_00001011;
56108 let r = _kxnor_mask16(a, b);
56109 let e: u16 = 0b00011101_11000111;
56110 assert_eq!(r, e);
56111 }
56112
56113 #[simd_test(enable = "avx512dq")]
56114 unsafe fn test_kortest_mask16_u8() {
56115 let a: __mmask16 = 0b0110100101101001;
56116 let b: __mmask16 = 0b1011011010110110;
56117 let mut all_ones: u8 = 0;
56118 let r = _kortest_mask16_u8(a, b, &mut all_ones);
56119 assert_eq!(r, 0);
56120 assert_eq!(all_ones, 1);
56121 }
56122
56123 #[simd_test(enable = "avx512dq")]
56124 unsafe fn test_kortestc_mask16_u8() {
56125 let a: __mmask16 = 0b0110100101101001;
56126 let b: __mmask16 = 0b1011011010110110;
56127 let r = _kortestc_mask16_u8(a, b);
56128 assert_eq!(r, 1);
56129 }
56130
56131 #[simd_test(enable = "avx512dq")]
56132 unsafe fn test_kortestz_mask16_u8() {
56133 let a: __mmask16 = 0b0110100101101001;
56134 let b: __mmask16 = 0b1011011010110110;
56135 let r = _kortestz_mask16_u8(a, b);
56136 assert_eq!(r, 0);
56137 }
56138
56139 #[simd_test(enable = "avx512dq")]
56140 unsafe fn test_kshiftli_mask16() {
56141 let a: __mmask16 = 0b1001011011000011;
56142 let r = _kshiftli_mask16::<3>(a);
56143 let e: __mmask16 = 0b1011011000011000;
56144 assert_eq!(r, e);
56145 }
56146
56147 #[simd_test(enable = "avx512dq")]
56148 unsafe fn test_kshiftri_mask16() {
56149 let a: __mmask16 = 0b0110100100111100;
56150 let r = _kshiftri_mask16::<3>(a);
56151 let e: __mmask16 = 0b0000110100100111;
56152 assert_eq!(r, e);
56153 }
56154
56155 #[simd_test(enable = "avx512f")]
56156 unsafe fn test_load_mask16() {
56157 let a: __mmask16 = 0b1001011011000011;
56158 let r = _load_mask16(&a);
56159 let e: __mmask16 = 0b1001011011000011;
56160 assert_eq!(r, e);
56161 }
56162
56163 #[simd_test(enable = "avx512f")]
56164 unsafe fn test_store_mask16() {
56165 let a: __mmask16 = 0b0110100100111100;
56166 let mut r = 0;
56167 _store_mask16(&mut r, a);
56168 let e: __mmask16 = 0b0110100100111100;
56169 assert_eq!(r, e);
56170 }
56171
56172 #[simd_test(enable = "avx512f")]
56173 unsafe fn test_mm512_kmov() {
56174 let a: u16 = 0b11001100_00110011;
56175 let r = _mm512_kmov(a);
56176 let e: u16 = 0b11001100_00110011;
56177 assert_eq!(r, e);
56178 }
56179
56180 #[simd_test(enable = "avx512f")]
56181 unsafe fn test_mm512_int2mask() {
56182 let a: i32 = 0b11001100_00110011;
56183 let r = _mm512_int2mask(a);
56184 let e: u16 = 0b11001100_00110011;
56185 assert_eq!(r, e);
56186 }
56187
56188 #[simd_test(enable = "avx512f")]
56189 unsafe fn test_mm512_mask2int() {
56190 let k1: __mmask16 = 0b11001100_00110011;
56191 let r = _mm512_mask2int(k1);
56192 let e: i32 = 0b11001100_00110011;
56193 assert_eq!(r, e);
56194 }
56195
56196 #[simd_test(enable = "avx512f")]
56197 unsafe fn test_mm512_kunpackb() {
56198 let a: u16 = 0b11001100_00110011;
56199 let b: u16 = 0b00101110_00001011;
56200 let r = _mm512_kunpackb(a, b);
56201 let e: u16 = 0b00110011_00001011;
56202 assert_eq!(r, e);
56203 }
56204
56205 #[simd_test(enable = "avx512f")]
56206 unsafe fn test_mm512_kortestc() {
56207 let a: u16 = 0b11001100_00110011;
56208 let b: u16 = 0b00101110_00001011;
56209 let r = _mm512_kortestc(a, b);
56210 assert_eq!(r, 0);
56211 let b: u16 = 0b11111111_11111111;
56212 let r = _mm512_kortestc(a, b);
56213 assert_eq!(r, 1);
56214 }
56215
56216 #[simd_test(enable = "avx512f")]
56217 unsafe fn test_mm512_kortestz() {
56218 let a: u16 = 0b11001100_00110011;
56219 let b: u16 = 0b00101110_00001011;
56220 let r = _mm512_kortestz(a, b);
56221 assert_eq!(r, 0);
56222 let r = _mm512_kortestz(0, 0);
56223 assert_eq!(r, 1);
56224 }
56225
56226 #[simd_test(enable = "avx512f")]
56227 unsafe fn test_mm512_test_epi32_mask() {
56228 let a = _mm512_set1_epi32(1 << 0);
56229 let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56230 let r = _mm512_test_epi32_mask(a, b);
56231 let e: __mmask16 = 0b11111111_11111111;
56232 assert_eq!(r, e);
56233 }
56234
56235 #[simd_test(enable = "avx512f")]
56236 unsafe fn test_mm512_mask_test_epi32_mask() {
56237 let a = _mm512_set1_epi32(1 << 0);
56238 let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56239 let r = _mm512_mask_test_epi32_mask(0, a, b);
56240 assert_eq!(r, 0);
56241 let r = _mm512_mask_test_epi32_mask(0b11111111_11111111, a, b);
56242 let e: __mmask16 = 0b11111111_11111111;
56243 assert_eq!(r, e);
56244 }
56245
56246 #[simd_test(enable = "avx512f,avx512vl")]
56247 unsafe fn test_mm256_test_epi32_mask() {
56248 let a = _mm256_set1_epi32(1 << 0);
56249 let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
56250 let r = _mm256_test_epi32_mask(a, b);
56251 let e: __mmask8 = 0b11111111;
56252 assert_eq!(r, e);
56253 }
56254
56255 #[simd_test(enable = "avx512f,avx512vl")]
56256 unsafe fn test_mm256_mask_test_epi32_mask() {
56257 let a = _mm256_set1_epi32(1 << 0);
56258 let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
56259 let r = _mm256_mask_test_epi32_mask(0, a, b);
56260 assert_eq!(r, 0);
56261 let r = _mm256_mask_test_epi32_mask(0b11111111, a, b);
56262 let e: __mmask8 = 0b11111111;
56263 assert_eq!(r, e);
56264 }
56265
56266 #[simd_test(enable = "avx512f,avx512vl")]
56267 unsafe fn test_mm_test_epi32_mask() {
56268 let a = _mm_set1_epi32(1 << 0);
56269 let b = _mm_set1_epi32(1 << 0 | 1 << 1);
56270 let r = _mm_test_epi32_mask(a, b);
56271 let e: __mmask8 = 0b00001111;
56272 assert_eq!(r, e);
56273 }
56274
56275 #[simd_test(enable = "avx512f,avx512vl")]
56276 unsafe fn test_mm_mask_test_epi32_mask() {
56277 let a = _mm_set1_epi32(1 << 0);
56278 let b = _mm_set1_epi32(1 << 0 | 1 << 1);
56279 let r = _mm_mask_test_epi32_mask(0, a, b);
56280 assert_eq!(r, 0);
56281 let r = _mm_mask_test_epi32_mask(0b11111111, a, b);
56282 let e: __mmask8 = 0b00001111;
56283 assert_eq!(r, e);
56284 }
56285
56286 #[simd_test(enable = "avx512f")]
56287 unsafe fn test_mm512_testn_epi32_mask() {
56288 let a = _mm512_set1_epi32(1 << 0);
56289 let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
56290 let r = _mm512_testn_epi32_mask(a, b);
56291 let e: __mmask16 = 0b00000000_00000000;
56292 assert_eq!(r, e);
56293 }
56294
56295 #[simd_test(enable = "avx512f")]
56296 unsafe fn test_mm512_mask_testn_epi32_mask() {
56297 let a = _mm512_set1_epi32(1 << 0);
56298 let b = _mm512_set1_epi32(1 << 1);
56299 let r = _mm512_mask_test_epi32_mask(0, a, b);
56300 assert_eq!(r, 0);
56301 let r = _mm512_mask_testn_epi32_mask(0b11111111_11111111, a, b);
56302 let e: __mmask16 = 0b11111111_11111111;
56303 assert_eq!(r, e);
56304 }
56305
56306 #[simd_test(enable = "avx512f,avx512vl")]
56307 unsafe fn test_mm256_testn_epi32_mask() {
56308 let a = _mm256_set1_epi32(1 << 0);
56309 let b = _mm256_set1_epi32(1 << 1);
56310 let r = _mm256_testn_epi32_mask(a, b);
56311 let e: __mmask8 = 0b11111111;
56312 assert_eq!(r, e);
56313 }
56314
56315 #[simd_test(enable = "avx512f,avx512vl")]
56316 unsafe fn test_mm256_mask_testn_epi32_mask() {
56317 let a = _mm256_set1_epi32(1 << 0);
56318 let b = _mm256_set1_epi32(1 << 1);
56319 let r = _mm256_mask_test_epi32_mask(0, a, b);
56320 assert_eq!(r, 0);
56321 let r = _mm256_mask_testn_epi32_mask(0b11111111, a, b);
56322 let e: __mmask8 = 0b11111111;
56323 assert_eq!(r, e);
56324 }
56325
56326 #[simd_test(enable = "avx512f,avx512vl")]
56327 unsafe fn test_mm_testn_epi32_mask() {
56328 let a = _mm_set1_epi32(1 << 0);
56329 let b = _mm_set1_epi32(1 << 1);
56330 let r = _mm_testn_epi32_mask(a, b);
56331 let e: __mmask8 = 0b00001111;
56332 assert_eq!(r, e);
56333 }
56334
56335 #[simd_test(enable = "avx512f,avx512vl")]
56336 unsafe fn test_mm_mask_testn_epi32_mask() {
56337 let a = _mm_set1_epi32(1 << 0);
56338 let b = _mm_set1_epi32(1 << 1);
56339 let r = _mm_mask_test_epi32_mask(0, a, b);
56340 assert_eq!(r, 0);
56341 let r = _mm_mask_testn_epi32_mask(0b11111111, a, b);
56342 let e: __mmask8 = 0b00001111;
56343 assert_eq!(r, e);
56344 }
56345
56346 #[simd_test(enable = "avx512f")]
56347 #[cfg_attr(miri, ignore)]
56348 unsafe fn test_mm512_stream_ps() {
56349 #[repr(align(64))]
56350 struct Memory {
56351 pub data: [f32; 16], // 64 bytes
56352 }
56353 let a = _mm512_set1_ps(7.0);
56354 let mut mem = Memory { data: [-1.0; 16] };
56355
56356 _mm512_stream_ps(&mut mem.data[0] as *mut f32, a);
56357 for i in 0..16 {
56358 assert_eq!(mem.data[i], get_m512(a, i));
56359 }
56360 }
56361
56362 #[simd_test(enable = "avx512f")]
56363 #[cfg_attr(miri, ignore)]
56364 unsafe fn test_mm512_stream_pd() {
56365 #[repr(align(64))]
56366 struct Memory {
56367 pub data: [f64; 8],
56368 }
56369 let a = _mm512_set1_pd(7.0);
56370 let mut mem = Memory { data: [-1.0; 8] };
56371
56372 _mm512_stream_pd(&mut mem.data[0] as *mut f64, a);
56373 for i in 0..8 {
56374 assert_eq!(mem.data[i], get_m512d(a, i));
56375 }
56376 }
56377
56378 #[simd_test(enable = "avx512f")]
56379 #[cfg_attr(miri, ignore)]
56380 unsafe fn test_mm512_stream_si512() {
56381 #[repr(align(64))]
56382 struct Memory {
56383 pub data: [i64; 8],
56384 }
56385 let a = _mm512_set1_epi32(7);
56386 let mut mem = Memory { data: [-1; 8] };
56387
56388 _mm512_stream_si512(mem.data.as_mut_ptr().cast(), a);
56389 for i in 0..8 {
56390 assert_eq!(mem.data[i], get_m512i(a, i));
56391 }
56392 }
56393
56394 #[simd_test(enable = "avx512f")]
56395 unsafe fn test_mm512_stream_load_si512() {
56396 let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
56397 let r = _mm512_stream_load_si512(core::ptr::addr_of!(a) as *const _);
56398 assert_eq_m512i(a, r);
56399 }
56400
56401 #[simd_test(enable = "avx512f")]
56402 unsafe fn test_mm512_reduce_add_epi32() {
56403 let a = _mm512_set1_epi32(1);
56404 let e: i32 = _mm512_reduce_add_epi32(a);
56405 assert_eq!(16, e);
56406 }
56407
56408 #[simd_test(enable = "avx512f")]
56409 unsafe fn test_mm512_mask_reduce_add_epi32() {
56410 let a = _mm512_set1_epi32(1);
56411 let e: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a);
56412 assert_eq!(8, e);
56413 }
56414
56415 #[simd_test(enable = "avx512f")]
56416 unsafe fn test_mm512_reduce_add_ps() {
56417 let a = _mm512_set1_ps(1.);
56418 let e: f32 = _mm512_reduce_add_ps(a);
56419 assert_eq!(16., e);
56420 }
56421
56422 #[simd_test(enable = "avx512f")]
56423 unsafe fn test_mm512_mask_reduce_add_ps() {
56424 let a = _mm512_set1_ps(1.);
56425 let e: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a);
56426 assert_eq!(8., e);
56427 }
56428
56429 #[simd_test(enable = "avx512f")]
56430 unsafe fn test_mm512_reduce_mul_epi32() {
56431 let a = _mm512_set1_epi32(2);
56432 let e: i32 = _mm512_reduce_mul_epi32(a);
56433 assert_eq!(65536, e);
56434 }
56435
56436 #[simd_test(enable = "avx512f")]
56437 unsafe fn test_mm512_mask_reduce_mul_epi32() {
56438 let a = _mm512_set1_epi32(2);
56439 let e: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a);
56440 assert_eq!(256, e);
56441 }
56442
56443 #[simd_test(enable = "avx512f")]
56444 unsafe fn test_mm512_reduce_mul_ps() {
56445 let a = _mm512_set1_ps(2.);
56446 let e: f32 = _mm512_reduce_mul_ps(a);
56447 assert_eq!(65536., e);
56448 }
56449
56450 #[simd_test(enable = "avx512f")]
56451 unsafe fn test_mm512_mask_reduce_mul_ps() {
56452 let a = _mm512_set1_ps(2.);
56453 let e: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a);
56454 assert_eq!(256., e);
56455 }
56456
56457 #[simd_test(enable = "avx512f")]
56458 unsafe fn test_mm512_reduce_max_epi32() {
56459 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56460 let e: i32 = _mm512_reduce_max_epi32(a);
56461 assert_eq!(15, e);
56462 }
56463
56464 #[simd_test(enable = "avx512f")]
56465 unsafe fn test_mm512_mask_reduce_max_epi32() {
56466 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56467 let e: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a);
56468 assert_eq!(7, e);
56469 }
56470
56471 #[simd_test(enable = "avx512f")]
56472 unsafe fn test_mm512_reduce_max_epu32() {
56473 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56474 let e: u32 = _mm512_reduce_max_epu32(a);
56475 assert_eq!(15, e);
56476 }
56477
56478 #[simd_test(enable = "avx512f")]
56479 unsafe fn test_mm512_mask_reduce_max_epu32() {
56480 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56481 let e: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a);
56482 assert_eq!(7, e);
56483 }
56484
56485 #[simd_test(enable = "avx512f")]
56486 unsafe fn test_mm512_reduce_max_ps() {
56487 let a = _mm512_set_ps(
56488 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56489 );
56490 let e: f32 = _mm512_reduce_max_ps(a);
56491 assert_eq!(15., e);
56492 }
56493
56494 #[simd_test(enable = "avx512f")]
56495 unsafe fn test_mm512_mask_reduce_max_ps() {
56496 let a = _mm512_set_ps(
56497 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56498 );
56499 let e: f32 = _mm512_mask_reduce_max_ps(0b11111111_00000000, a);
56500 assert_eq!(7., e);
56501 }
56502
56503 #[simd_test(enable = "avx512f")]
56504 unsafe fn test_mm512_reduce_min_epi32() {
56505 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56506 let e: i32 = _mm512_reduce_min_epi32(a);
56507 assert_eq!(0, e);
56508 }
56509
56510 #[simd_test(enable = "avx512f")]
56511 unsafe fn test_mm512_mask_reduce_min_epi32() {
56512 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56513 let e: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a);
56514 assert_eq!(0, e);
56515 }
56516
56517 #[simd_test(enable = "avx512f")]
56518 unsafe fn test_mm512_reduce_min_epu32() {
56519 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56520 let e: u32 = _mm512_reduce_min_epu32(a);
56521 assert_eq!(0, e);
56522 }
56523
56524 #[simd_test(enable = "avx512f")]
56525 unsafe fn test_mm512_mask_reduce_min_epu32() {
56526 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56527 let e: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a);
56528 assert_eq!(0, e);
56529 }
56530
56531 #[simd_test(enable = "avx512f")]
56532 unsafe fn test_mm512_reduce_min_ps() {
56533 let a = _mm512_set_ps(
56534 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56535 );
56536 let e: f32 = _mm512_reduce_min_ps(a);
56537 assert_eq!(0., e);
56538 }
56539
56540 #[simd_test(enable = "avx512f")]
56541 unsafe fn test_mm512_mask_reduce_min_ps() {
56542 let a = _mm512_set_ps(
56543 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56544 );
56545 let e: f32 = _mm512_mask_reduce_min_ps(0b11111111_00000000, a);
56546 assert_eq!(0., e);
56547 }
56548
56549 #[simd_test(enable = "avx512f")]
56550 unsafe fn test_mm512_reduce_and_epi32() {
56551 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56552 let e: i32 = _mm512_reduce_and_epi32(a);
56553 assert_eq!(0, e);
56554 }
56555
56556 #[simd_test(enable = "avx512f")]
56557 unsafe fn test_mm512_mask_reduce_and_epi32() {
56558 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56559 let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
56560 assert_eq!(1, e);
56561 }
56562
56563 #[simd_test(enable = "avx512f")]
56564 unsafe fn test_mm512_reduce_or_epi32() {
56565 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56566 let e: i32 = _mm512_reduce_or_epi32(a);
56567 assert_eq!(3, e);
56568 }
56569
56570 #[simd_test(enable = "avx512f")]
56571 unsafe fn test_mm512_mask_reduce_or_epi32() {
56572 let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
56573 let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
56574 assert_eq!(1, e);
56575 }
56576
56577 #[simd_test(enable = "avx512f")]
56578 unsafe fn test_mm512_mask_compress_epi32() {
56579 let src = _mm512_set1_epi32(200);
56580 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56581 let r = _mm512_mask_compress_epi32(src, 0, a);
56582 assert_eq_m512i(r, src);
56583 let r = _mm512_mask_compress_epi32(src, 0b01010101_01010101, a);
56584 let e = _mm512_set_epi32(
56585 200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
56586 );
56587 assert_eq_m512i(r, e);
56588 }
56589
56590 #[simd_test(enable = "avx512f")]
56591 unsafe fn test_mm512_maskz_compress_epi32() {
56592 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56593 let r = _mm512_maskz_compress_epi32(0, a);
56594 assert_eq_m512i(r, _mm512_setzero_si512());
56595 let r = _mm512_maskz_compress_epi32(0b01010101_01010101, a);
56596 let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
56597 assert_eq_m512i(r, e);
56598 }
56599
56600 #[simd_test(enable = "avx512f,avx512vl")]
56601 unsafe fn test_mm256_mask_compress_epi32() {
56602 let src = _mm256_set1_epi32(200);
56603 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56604 let r = _mm256_mask_compress_epi32(src, 0, a);
56605 assert_eq_m256i(r, src);
56606 let r = _mm256_mask_compress_epi32(src, 0b01010101, a);
56607 let e = _mm256_set_epi32(200, 200, 200, 200, 1, 3, 5, 7);
56608 assert_eq_m256i(r, e);
56609 }
56610
56611 #[simd_test(enable = "avx512f,avx512vl")]
56612 unsafe fn test_mm256_maskz_compress_epi32() {
56613 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56614 let r = _mm256_maskz_compress_epi32(0, a);
56615 assert_eq_m256i(r, _mm256_setzero_si256());
56616 let r = _mm256_maskz_compress_epi32(0b01010101, a);
56617 let e = _mm256_set_epi32(0, 0, 0, 0, 1, 3, 5, 7);
56618 assert_eq_m256i(r, e);
56619 }
56620
56621 #[simd_test(enable = "avx512f,avx512vl")]
56622 unsafe fn test_mm_mask_compress_epi32() {
56623 let src = _mm_set1_epi32(200);
56624 let a = _mm_set_epi32(0, 1, 2, 3);
56625 let r = _mm_mask_compress_epi32(src, 0, a);
56626 assert_eq_m128i(r, src);
56627 let r = _mm_mask_compress_epi32(src, 0b00000101, a);
56628 let e = _mm_set_epi32(200, 200, 1, 3);
56629 assert_eq_m128i(r, e);
56630 }
56631
56632 #[simd_test(enable = "avx512f,avx512vl")]
56633 unsafe fn test_mm_maskz_compress_epi32() {
56634 let a = _mm_set_epi32(0, 1, 2, 3);
56635 let r = _mm_maskz_compress_epi32(0, a);
56636 assert_eq_m128i(r, _mm_setzero_si128());
56637 let r = _mm_maskz_compress_epi32(0b00000101, a);
56638 let e = _mm_set_epi32(0, 0, 1, 3);
56639 assert_eq_m128i(r, e);
56640 }
56641
56642 #[simd_test(enable = "avx512f")]
56643 unsafe fn test_mm512_mask_compress_ps() {
56644 let src = _mm512_set1_ps(200.);
56645 let a = _mm512_set_ps(
56646 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56647 );
56648 let r = _mm512_mask_compress_ps(src, 0, a);
56649 assert_eq_m512(r, src);
56650 let r = _mm512_mask_compress_ps(src, 0b01010101_01010101, a);
56651 let e = _mm512_set_ps(
56652 200., 200., 200., 200., 200., 200., 200., 200., 1., 3., 5., 7., 9., 11., 13., 15.,
56653 );
56654 assert_eq_m512(r, e);
56655 }
56656
56657 #[simd_test(enable = "avx512f")]
56658 unsafe fn test_mm512_maskz_compress_ps() {
56659 let a = _mm512_set_ps(
56660 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56661 );
56662 let r = _mm512_maskz_compress_ps(0, a);
56663 assert_eq_m512(r, _mm512_setzero_ps());
56664 let r = _mm512_maskz_compress_ps(0b01010101_01010101, a);
56665 let e = _mm512_set_ps(
56666 0., 0., 0., 0., 0., 0., 0., 0., 1., 3., 5., 7., 9., 11., 13., 15.,
56667 );
56668 assert_eq_m512(r, e);
56669 }
56670
56671 #[simd_test(enable = "avx512f,avx512vl")]
56672 unsafe fn test_mm256_mask_compress_ps() {
56673 let src = _mm256_set1_ps(200.);
56674 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56675 let r = _mm256_mask_compress_ps(src, 0, a);
56676 assert_eq_m256(r, src);
56677 let r = _mm256_mask_compress_ps(src, 0b01010101, a);
56678 let e = _mm256_set_ps(200., 200., 200., 200., 1., 3., 5., 7.);
56679 assert_eq_m256(r, e);
56680 }
56681
56682 #[simd_test(enable = "avx512f,avx512vl")]
56683 unsafe fn test_mm256_maskz_compress_ps() {
56684 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56685 let r = _mm256_maskz_compress_ps(0, a);
56686 assert_eq_m256(r, _mm256_setzero_ps());
56687 let r = _mm256_maskz_compress_ps(0b01010101, a);
56688 let e = _mm256_set_ps(0., 0., 0., 0., 1., 3., 5., 7.);
56689 assert_eq_m256(r, e);
56690 }
56691
56692 #[simd_test(enable = "avx512f,avx512vl")]
56693 unsafe fn test_mm_mask_compress_ps() {
56694 let src = _mm_set1_ps(200.);
56695 let a = _mm_set_ps(0., 1., 2., 3.);
56696 let r = _mm_mask_compress_ps(src, 0, a);
56697 assert_eq_m128(r, src);
56698 let r = _mm_mask_compress_ps(src, 0b00000101, a);
56699 let e = _mm_set_ps(200., 200., 1., 3.);
56700 assert_eq_m128(r, e);
56701 }
56702
56703 #[simd_test(enable = "avx512f,avx512vl")]
56704 unsafe fn test_mm_maskz_compress_ps() {
56705 let a = _mm_set_ps(0., 1., 2., 3.);
56706 let r = _mm_maskz_compress_ps(0, a);
56707 assert_eq_m128(r, _mm_setzero_ps());
56708 let r = _mm_maskz_compress_ps(0b00000101, a);
56709 let e = _mm_set_ps(0., 0., 1., 3.);
56710 assert_eq_m128(r, e);
56711 }
56712
56713 #[simd_test(enable = "avx512f")]
56714 unsafe fn test_mm512_mask_compressstoreu_epi32() {
56715 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
56716 let mut r = [0_i32; 16];
56717 _mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
56718 assert_eq!(&r, &[0_i32; 16]);
56719 _mm512_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b1111000011001010, a);
56720 assert_eq!(&r, &[2, 4, 7, 8, 13, 14, 15, 16, 0, 0, 0, 0, 0, 0, 0, 0]);
56721 }
56722
56723 #[simd_test(enable = "avx512f,avx512vl")]
56724 unsafe fn test_mm256_mask_compressstoreu_epi32() {
56725 let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
56726 let mut r = [0_i32; 8];
56727 _mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
56728 assert_eq!(&r, &[0_i32; 8]);
56729 _mm256_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b11001010, a);
56730 assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
56731 }
56732
56733 #[simd_test(enable = "avx512f,avx512vl")]
56734 unsafe fn test_mm_mask_compressstoreu_epi32() {
56735 let a = _mm_setr_epi32(1, 2, 3, 4);
56736 let mut r = [0_i32; 4];
56737 _mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0, a);
56738 assert_eq!(&r, &[0_i32; 4]);
56739 _mm_mask_compressstoreu_epi32(r.as_mut_ptr() as *mut _, 0b1011, a);
56740 assert_eq!(&r, &[1, 2, 4, 0]);
56741 }
56742
56743 #[simd_test(enable = "avx512f")]
56744 unsafe fn test_mm512_mask_compressstoreu_epi64() {
56745 let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
56746 let mut r = [0_i64; 8];
56747 _mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
56748 assert_eq!(&r, &[0_i64; 8]);
56749 _mm512_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b11001010, a);
56750 assert_eq!(&r, &[2, 4, 7, 8, 0, 0, 0, 0]);
56751 }
56752
56753 #[simd_test(enable = "avx512f,avx512vl")]
56754 unsafe fn test_mm256_mask_compressstoreu_epi64() {
56755 let a = _mm256_setr_epi64x(1, 2, 3, 4);
56756 let mut r = [0_i64; 4];
56757 _mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
56758 assert_eq!(&r, &[0_i64; 4]);
56759 _mm256_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b1011, a);
56760 assert_eq!(&r, &[1, 2, 4, 0]);
56761 }
56762
56763 #[simd_test(enable = "avx512f,avx512vl")]
56764 unsafe fn test_mm_mask_compressstoreu_epi64() {
56765 let a = _mm_setr_epi64x(1, 2);
56766 let mut r = [0_i64; 2];
56767 _mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0, a);
56768 assert_eq!(&r, &[0_i64; 2]);
56769 _mm_mask_compressstoreu_epi64(r.as_mut_ptr() as *mut _, 0b10, a);
56770 assert_eq!(&r, &[2, 0]);
56771 }
56772
56773 #[simd_test(enable = "avx512f")]
56774 unsafe fn test_mm512_mask_compressstoreu_ps() {
56775 let a = _mm512_setr_ps(
56776 1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32, 9_f32, 10_f32, 11_f32, 12_f32,
56777 13_f32, 14_f32, 15_f32, 16_f32,
56778 );
56779 let mut r = [0_f32; 16];
56780 _mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
56781 assert_eq!(&r, &[0_f32; 16]);
56782 _mm512_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b1111000011001010, a);
56783 assert_eq!(
56784 &r,
56785 &[
56786 2_f32, 4_f32, 7_f32, 8_f32, 13_f32, 14_f32, 15_f32, 16_f32, 0_f32, 0_f32, 0_f32,
56787 0_f32, 0_f32, 0_f32, 0_f32, 0_f32
56788 ]
56789 );
56790 }
56791
56792 #[simd_test(enable = "avx512f,avx512vl")]
56793 unsafe fn test_mm256_mask_compressstoreu_ps() {
56794 let a = _mm256_setr_ps(1_f32, 2_f32, 3_f32, 4_f32, 5_f32, 6_f32, 7_f32, 8_f32);
56795 let mut r = [0_f32; 8];
56796 _mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
56797 assert_eq!(&r, &[0_f32; 8]);
56798 _mm256_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b11001010, a);
56799 assert_eq!(
56800 &r,
56801 &[2_f32, 4_f32, 7_f32, 8_f32, 0_f32, 0_f32, 0_f32, 0_f32]
56802 );
56803 }
56804
56805 #[simd_test(enable = "avx512f,avx512vl")]
56806 unsafe fn test_mm_mask_compressstoreu_ps() {
56807 let a = _mm_setr_ps(1_f32, 2_f32, 3_f32, 4_f32);
56808 let mut r = [0.; 4];
56809 _mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0, a);
56810 assert_eq!(&r, &[0.; 4]);
56811 _mm_mask_compressstoreu_ps(r.as_mut_ptr() as *mut _, 0b1011, a);
56812 assert_eq!(&r, &[1_f32, 2_f32, 4_f32, 0_f32]);
56813 }
56814
56815 #[simd_test(enable = "avx512f")]
56816 unsafe fn test_mm512_mask_compressstoreu_pd() {
56817 let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
56818 let mut r = [0.; 8];
56819 _mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
56820 assert_eq!(&r, &[0.; 8]);
56821 _mm512_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b11001010, a);
56822 assert_eq!(&r, &[2., 4., 7., 8., 0., 0., 0., 0.]);
56823 }
56824
56825 #[simd_test(enable = "avx512f,avx512vl")]
56826 unsafe fn test_mm256_mask_compressstoreu_pd() {
56827 let a = _mm256_setr_pd(1., 2., 3., 4.);
56828 let mut r = [0.; 4];
56829 _mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
56830 assert_eq!(&r, &[0.; 4]);
56831 _mm256_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b1011, a);
56832 assert_eq!(&r, &[1., 2., 4., 0.]);
56833 }
56834
56835 #[simd_test(enable = "avx512f,avx512vl")]
56836 unsafe fn test_mm_mask_compressstoreu_pd() {
56837 let a = _mm_setr_pd(1., 2.);
56838 let mut r = [0.; 2];
56839 _mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0, a);
56840 assert_eq!(&r, &[0.; 2]);
56841 _mm_mask_compressstoreu_pd(r.as_mut_ptr() as *mut _, 0b10, a);
56842 assert_eq!(&r, &[2., 0.]);
56843 }
56844
56845 #[simd_test(enable = "avx512f")]
56846 unsafe fn test_mm512_mask_expand_epi32() {
56847 let src = _mm512_set1_epi32(200);
56848 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56849 let r = _mm512_mask_expand_epi32(src, 0, a);
56850 assert_eq_m512i(r, src);
56851 let r = _mm512_mask_expand_epi32(src, 0b01010101_01010101, a);
56852 let e = _mm512_set_epi32(
56853 200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
56854 );
56855 assert_eq_m512i(r, e);
56856 }
56857
56858 #[simd_test(enable = "avx512f")]
56859 unsafe fn test_mm512_maskz_expand_epi32() {
56860 let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
56861 let r = _mm512_maskz_expand_epi32(0, a);
56862 assert_eq_m512i(r, _mm512_setzero_si512());
56863 let r = _mm512_maskz_expand_epi32(0b01010101_01010101, a);
56864 let e = _mm512_set_epi32(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
56865 assert_eq_m512i(r, e);
56866 }
56867
56868 #[simd_test(enable = "avx512f,avx512vl")]
56869 unsafe fn test_mm256_mask_expand_epi32() {
56870 let src = _mm256_set1_epi32(200);
56871 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56872 let r = _mm256_mask_expand_epi32(src, 0, a);
56873 assert_eq_m256i(r, src);
56874 let r = _mm256_mask_expand_epi32(src, 0b01010101, a);
56875 let e = _mm256_set_epi32(200, 4, 200, 5, 200, 6, 200, 7);
56876 assert_eq_m256i(r, e);
56877 }
56878
56879 #[simd_test(enable = "avx512f,avx512vl")]
56880 unsafe fn test_mm256_maskz_expand_epi32() {
56881 let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
56882 let r = _mm256_maskz_expand_epi32(0, a);
56883 assert_eq_m256i(r, _mm256_setzero_si256());
56884 let r = _mm256_maskz_expand_epi32(0b01010101, a);
56885 let e = _mm256_set_epi32(0, 4, 0, 5, 0, 6, 0, 7);
56886 assert_eq_m256i(r, e);
56887 }
56888
56889 #[simd_test(enable = "avx512f,avx512vl")]
56890 unsafe fn test_mm_mask_expand_epi32() {
56891 let src = _mm_set1_epi32(200);
56892 let a = _mm_set_epi32(0, 1, 2, 3);
56893 let r = _mm_mask_expand_epi32(src, 0, a);
56894 assert_eq_m128i(r, src);
56895 let r = _mm_mask_expand_epi32(src, 0b00000101, a);
56896 let e = _mm_set_epi32(200, 2, 200, 3);
56897 assert_eq_m128i(r, e);
56898 }
56899
56900 #[simd_test(enable = "avx512f,avx512vl")]
56901 unsafe fn test_mm_maskz_expand_epi32() {
56902 let a = _mm_set_epi32(0, 1, 2, 3);
56903 let r = _mm_maskz_expand_epi32(0, a);
56904 assert_eq_m128i(r, _mm_setzero_si128());
56905 let r = _mm_maskz_expand_epi32(0b00000101, a);
56906 let e = _mm_set_epi32(0, 2, 0, 3);
56907 assert_eq_m128i(r, e);
56908 }
56909
56910 #[simd_test(enable = "avx512f")]
56911 unsafe fn test_mm512_mask_expand_ps() {
56912 let src = _mm512_set1_ps(200.);
56913 let a = _mm512_set_ps(
56914 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56915 );
56916 let r = _mm512_mask_expand_ps(src, 0, a);
56917 assert_eq_m512(r, src);
56918 let r = _mm512_mask_expand_ps(src, 0b01010101_01010101, a);
56919 let e = _mm512_set_ps(
56920 200., 8., 200., 9., 200., 10., 200., 11., 200., 12., 200., 13., 200., 14., 200., 15.,
56921 );
56922 assert_eq_m512(r, e);
56923 }
56924
56925 #[simd_test(enable = "avx512f")]
56926 unsafe fn test_mm512_maskz_expand_ps() {
56927 let a = _mm512_set_ps(
56928 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
56929 );
56930 let r = _mm512_maskz_expand_ps(0, a);
56931 assert_eq_m512(r, _mm512_setzero_ps());
56932 let r = _mm512_maskz_expand_ps(0b01010101_01010101, a);
56933 let e = _mm512_set_ps(
56934 0., 8., 0., 9., 0., 10., 0., 11., 0., 12., 0., 13., 0., 14., 0., 15.,
56935 );
56936 assert_eq_m512(r, e);
56937 }
56938
56939 #[simd_test(enable = "avx512f,avx512vl")]
56940 unsafe fn test_mm256_mask_expand_ps() {
56941 let src = _mm256_set1_ps(200.);
56942 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56943 let r = _mm256_mask_expand_ps(src, 0, a);
56944 assert_eq_m256(r, src);
56945 let r = _mm256_mask_expand_ps(src, 0b01010101, a);
56946 let e = _mm256_set_ps(200., 4., 200., 5., 200., 6., 200., 7.);
56947 assert_eq_m256(r, e);
56948 }
56949
56950 #[simd_test(enable = "avx512f,avx512vl")]
56951 unsafe fn test_mm256_maskz_expand_ps() {
56952 let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
56953 let r = _mm256_maskz_expand_ps(0, a);
56954 assert_eq_m256(r, _mm256_setzero_ps());
56955 let r = _mm256_maskz_expand_ps(0b01010101, a);
56956 let e = _mm256_set_ps(0., 4., 0., 5., 0., 6., 0., 7.);
56957 assert_eq_m256(r, e);
56958 }
56959
56960 #[simd_test(enable = "avx512f,avx512vl")]
56961 unsafe fn test_mm_mask_expand_ps() {
56962 let src = _mm_set1_ps(200.);
56963 let a = _mm_set_ps(0., 1., 2., 3.);
56964 let r = _mm_mask_expand_ps(src, 0, a);
56965 assert_eq_m128(r, src);
56966 let r = _mm_mask_expand_ps(src, 0b00000101, a);
56967 let e = _mm_set_ps(200., 2., 200., 3.);
56968 assert_eq_m128(r, e);
56969 }
56970
56971 #[simd_test(enable = "avx512f,avx512vl")]
56972 unsafe fn test_mm_maskz_expand_ps() {
56973 let a = _mm_set_ps(0., 1., 2., 3.);
56974 let r = _mm_maskz_expand_ps(0, a);
56975 assert_eq_m128(r, _mm_setzero_ps());
56976 let r = _mm_maskz_expand_ps(0b00000101, a);
56977 let e = _mm_set_ps(0., 2., 0., 3.);
56978 assert_eq_m128(r, e);
56979 }
56980
56981 #[simd_test(enable = "avx512f")]
56982 unsafe fn test_mm512_loadu_epi32() {
56983 let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
56984 let p = a.as_ptr();
56985 let r = _mm512_loadu_epi32(black_box(p));
56986 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
56987 assert_eq_m512i(r, e);
56988 }
56989
56990 #[simd_test(enable = "avx512f,avx512vl")]
56991 unsafe fn test_mm256_loadu_epi32() {
56992 let a = &[4, 3, 2, 5, 8, 9, 64, 50];
56993 let p = a.as_ptr();
56994 let r = _mm256_loadu_epi32(black_box(p));
56995 let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
56996 assert_eq_m256i(r, e);
56997 }
56998
56999 #[simd_test(enable = "avx512f,avx512vl")]
57000 unsafe fn test_mm_loadu_epi32() {
57001 let a = &[4, 3, 2, 5];
57002 let p = a.as_ptr();
57003 let r = _mm_loadu_epi32(black_box(p));
57004 let e = _mm_setr_epi32(4, 3, 2, 5);
57005 assert_eq_m128i(r, e);
57006 }
57007
57008 #[simd_test(enable = "avx512f")]
57009 unsafe fn test_mm512_mask_cvtepi32_storeu_epi16() {
57010 let a = _mm512_set1_epi32(9);
57011 let mut r = _mm256_undefined_si256();
57012 _mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57013 let e = _mm256_set1_epi16(9);
57014 assert_eq_m256i(r, e);
57015 }
57016
57017 #[simd_test(enable = "avx512f,avx512vl")]
57018 unsafe fn test_mm256_mask_cvtepi32_storeu_epi16() {
57019 let a = _mm256_set1_epi32(9);
57020 let mut r = _mm_undefined_si128();
57021 _mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57022 let e = _mm_set1_epi16(9);
57023 assert_eq_m128i(r, e);
57024 }
57025
57026 #[simd_test(enable = "avx512f,avx512vl")]
57027 unsafe fn test_mm_mask_cvtepi32_storeu_epi16() {
57028 let a = _mm_set1_epi32(9);
57029 let mut r = _mm_set1_epi8(0);
57030 _mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57031 let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9);
57032 assert_eq_m128i(r, e);
57033 }
57034
57035 #[simd_test(enable = "avx512f")]
57036 unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() {
57037 let a = _mm512_set1_epi32(i32::MAX);
57038 let mut r = _mm256_undefined_si256();
57039 _mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57040 let e = _mm256_set1_epi16(i16::MAX);
57041 assert_eq_m256i(r, e);
57042 }
57043
57044 #[simd_test(enable = "avx512f,avx512vl")]
57045 unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() {
57046 let a = _mm256_set1_epi32(i32::MAX);
57047 let mut r = _mm_undefined_si128();
57048 _mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57049 let e = _mm_set1_epi16(i16::MAX);
57050 assert_eq_m128i(r, e);
57051 }
57052
57053 #[simd_test(enable = "avx512f,avx512vl")]
57054 unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() {
57055 let a = _mm_set1_epi32(i32::MAX);
57056 let mut r = _mm_set1_epi8(0);
57057 _mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57058 let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
57059 assert_eq_m128i(r, e);
57060 }
57061
57062 #[simd_test(enable = "avx512f")]
57063 unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() {
57064 let a = _mm512_set1_epi32(i32::MAX);
57065 let mut r = _mm256_undefined_si256();
57066 _mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57067 let e = _mm256_set1_epi16(u16::MAX as i16);
57068 assert_eq_m256i(r, e);
57069 }
57070
57071 #[simd_test(enable = "avx512f,avx512vl")]
57072 unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() {
57073 let a = _mm256_set1_epi32(i32::MAX);
57074 let mut r = _mm_undefined_si128();
57075 _mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57076 let e = _mm_set1_epi16(u16::MAX as i16);
57077 assert_eq_m128i(r, e);
57078 }
57079
57080 #[simd_test(enable = "avx512f,avx512vl")]
57081 unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() {
57082 let a = _mm_set1_epi32(i32::MAX);
57083 let mut r = _mm_set1_epi8(0);
57084 _mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
57085 let e = _mm_set_epi16(
57086 0,
57087 0,
57088 0,
57089 0,
57090 u16::MAX as i16,
57091 u16::MAX as i16,
57092 u16::MAX as i16,
57093 u16::MAX as i16,
57094 );
57095 assert_eq_m128i(r, e);
57096 }
57097
57098 #[simd_test(enable = "avx512f")]
57099 unsafe fn test_mm512_mask_cvtepi32_storeu_epi8() {
57100 let a = _mm512_set1_epi32(9);
57101 let mut r = _mm_undefined_si128();
57102 _mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57103 let e = _mm_set1_epi8(9);
57104 assert_eq_m128i(r, e);
57105 }
57106
57107 #[simd_test(enable = "avx512f,avx512vl")]
57108 unsafe fn test_mm256_mask_cvtepi32_storeu_epi8() {
57109 let a = _mm256_set1_epi32(9);
57110 let mut r = _mm_set1_epi8(0);
57111 _mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57112 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9);
57113 assert_eq_m128i(r, e);
57114 }
57115
57116 #[simd_test(enable = "avx512f,avx512vl")]
57117 unsafe fn test_mm_mask_cvtepi32_storeu_epi8() {
57118 let a = _mm_set1_epi32(9);
57119 let mut r = _mm_set1_epi8(0);
57120 _mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57121 let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9);
57122 assert_eq_m128i(r, e);
57123 }
57124
57125 #[simd_test(enable = "avx512f")]
57126 unsafe fn test_mm512_mask_cvtsepi32_storeu_epi8() {
57127 let a = _mm512_set1_epi32(i32::MAX);
57128 let mut r = _mm_undefined_si128();
57129 _mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57130 let e = _mm_set1_epi8(i8::MAX);
57131 assert_eq_m128i(r, e);
57132 }
57133
57134 #[simd_test(enable = "avx512f,avx512vl")]
57135 unsafe fn test_mm256_mask_cvtsepi32_storeu_epi8() {
57136 let a = _mm256_set1_epi32(i32::MAX);
57137 let mut r = _mm_set1_epi8(0);
57138 _mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57139 #[rustfmt::skip]
57140 let e = _mm_set_epi8(
57141 0, 0, 0, 0,
57142 0, 0, 0, 0,
57143 i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57144 i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57145 );
57146 assert_eq_m128i(r, e);
57147 }
57148
57149 #[simd_test(enable = "avx512f,avx512vl")]
57150 unsafe fn test_mm_mask_cvtsepi32_storeu_epi8() {
57151 let a = _mm_set1_epi32(i32::MAX);
57152 let mut r = _mm_set1_epi8(0);
57153 _mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57154 #[rustfmt::skip]
57155 let e = _mm_set_epi8(
57156 0, 0, 0, 0,
57157 0, 0, 0, 0,
57158 0, 0, 0, 0,
57159 i8::MAX, i8::MAX, i8::MAX, i8::MAX,
57160 );
57161 assert_eq_m128i(r, e);
57162 }
57163
57164 #[simd_test(enable = "avx512f")]
57165 unsafe fn test_mm512_mask_cvtusepi32_storeu_epi8() {
57166 let a = _mm512_set1_epi32(i32::MAX);
57167 let mut r = _mm_undefined_si128();
57168 _mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
57169 let e = _mm_set1_epi8(u8::MAX as i8);
57170 assert_eq_m128i(r, e);
57171 }
57172
57173 #[simd_test(enable = "avx512f,avx512vl")]
57174 unsafe fn test_mm256_mask_cvtusepi32_storeu_epi8() {
57175 let a = _mm256_set1_epi32(i32::MAX);
57176 let mut r = _mm_set1_epi8(0);
57177 _mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57178 #[rustfmt::skip]
57179 let e = _mm_set_epi8(
57180 0, 0, 0, 0,
57181 0, 0, 0, 0,
57182 u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57183 u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57184 );
57185 assert_eq_m128i(r, e);
57186 }
57187
57188 #[simd_test(enable = "avx512f,avx512vl")]
57189 unsafe fn test_mm_mask_cvtusepi32_storeu_epi8() {
57190 let a = _mm_set1_epi32(i32::MAX);
57191 let mut r = _mm_set1_epi8(0);
57192 _mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
57193 #[rustfmt::skip]
57194 let e = _mm_set_epi8(
57195 0, 0, 0, 0,
57196 0, 0, 0, 0,
57197 0, 0, 0, 0,
57198 u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
57199 );
57200 assert_eq_m128i(r, e);
57201 }
57202
57203 #[simd_test(enable = "avx512f")]
57204 unsafe fn test_mm512_storeu_epi32() {
57205 let a = _mm512_set1_epi32(9);
57206 let mut r = _mm512_undefined_epi32();
57207 _mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57208 assert_eq_m512i(r, a);
57209 }
57210
57211 #[simd_test(enable = "avx512f,avx512vl")]
57212 unsafe fn test_mm256_storeu_epi32() {
57213 let a = _mm256_set1_epi32(9);
57214 let mut r = _mm256_undefined_si256();
57215 _mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57216 assert_eq_m256i(r, a);
57217 }
57218
57219 #[simd_test(enable = "avx512f,avx512vl")]
57220 unsafe fn test_mm_storeu_epi32() {
57221 let a = _mm_set1_epi32(9);
57222 let mut r = _mm_undefined_si128();
57223 _mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
57224 assert_eq_m128i(r, a);
57225 }
57226
57227 #[simd_test(enable = "avx512f")]
57228 unsafe fn test_mm512_loadu_si512() {
57229 let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
57230 let p = a.as_ptr();
57231 let r = _mm512_loadu_si512(black_box(p));
57232 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57233 assert_eq_m512i(r, e);
57234 }
57235
57236 #[simd_test(enable = "avx512f")]
57237 unsafe fn test_mm512_storeu_si512() {
57238 let a = _mm512_set1_epi32(9);
57239 let mut r = _mm512_undefined_epi32();
57240 _mm512_storeu_si512(&mut r as *mut _, a);
57241 assert_eq_m512i(r, a);
57242 }
57243
57244 #[simd_test(enable = "avx512f")]
57245 unsafe fn test_mm512_load_si512() {
57246 #[repr(align(64))]
57247 struct Align {
57248 data: [i32; 16], // 64 bytes
57249 }
57250 let a = Align {
57251 data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
57252 };
57253 let p = (a.data).as_ptr();
57254 let r = _mm512_load_si512(black_box(p));
57255 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57256 assert_eq_m512i(r, e);
57257 }
57258
57259 #[simd_test(enable = "avx512f")]
57260 unsafe fn test_mm512_store_si512() {
57261 let a = _mm512_set1_epi32(9);
57262 let mut r = _mm512_undefined_epi32();
57263 _mm512_store_si512(&mut r as *mut _, a);
57264 assert_eq_m512i(r, a);
57265 }
57266
57267 #[simd_test(enable = "avx512f")]
57268 unsafe fn test_mm512_load_epi32() {
57269 #[repr(align(64))]
57270 struct Align {
57271 data: [i32; 16], // 64 bytes
57272 }
57273 let a = Align {
57274 data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
57275 };
57276 let p = (a.data).as_ptr();
57277 let r = _mm512_load_epi32(black_box(p));
57278 let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
57279 assert_eq_m512i(r, e);
57280 }
57281
57282 #[simd_test(enable = "avx512f,avx512vl")]
57283 unsafe fn test_mm256_load_epi32() {
57284 #[repr(align(64))]
57285 struct Align {
57286 data: [i32; 8],
57287 }
57288 let a = Align {
57289 data: [4, 3, 2, 5, 8, 9, 64, 50],
57290 };
57291 let p = (a.data).as_ptr();
57292 let r = _mm256_load_epi32(black_box(p));
57293 let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
57294 assert_eq_m256i(r, e);
57295 }
57296
57297 #[simd_test(enable = "avx512f,avx512vl")]
57298 unsafe fn test_mm_load_epi32() {
57299 #[repr(align(64))]
57300 struct Align {
57301 data: [i32; 4],
57302 }
57303 let a = Align { data: [4, 3, 2, 5] };
57304 let p = (a.data).as_ptr();
57305 let r = _mm_load_epi32(black_box(p));
57306 let e = _mm_setr_epi32(4, 3, 2, 5);
57307 assert_eq_m128i(r, e);
57308 }
57309
57310 #[simd_test(enable = "avx512f")]
57311 unsafe fn test_mm512_store_epi32() {
57312 let a = _mm512_set1_epi32(9);
57313 let mut r = _mm512_undefined_epi32();
57314 _mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
57315 assert_eq_m512i(r, a);
57316 }
57317
57318 #[simd_test(enable = "avx512f,avx512vl")]
57319 unsafe fn test_mm256_store_epi32() {
57320 let a = _mm256_set1_epi32(9);
57321 let mut r = _mm256_undefined_si256();
57322 _mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
57323 assert_eq_m256i(r, a);
57324 }
57325
57326 #[simd_test(enable = "avx512f,avx512vl")]
57327 unsafe fn test_mm_store_epi32() {
57328 let a = _mm_set1_epi32(9);
57329 let mut r = _mm_undefined_si128();
57330 _mm_store_epi32(&mut r as *mut _ as *mut i32, a);
57331 assert_eq_m128i(r, a);
57332 }
57333
57334 #[simd_test(enable = "avx512f")]
57335 unsafe fn test_mm512_load_ps() {
57336 #[repr(align(64))]
57337 struct Align {
57338 data: [f32; 16], // 64 bytes
57339 }
57340 let a = Align {
57341 data: [
57342 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
57343 ],
57344 };
57345 let p = (a.data).as_ptr();
57346 let r = _mm512_load_ps(black_box(p));
57347 let e = _mm512_setr_ps(
57348 4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
57349 );
57350 assert_eq_m512(r, e);
57351 }
57352
57353 #[simd_test(enable = "avx512f")]
57354 unsafe fn test_mm512_store_ps() {
57355 let a = _mm512_set1_ps(9.);
57356 let mut r = _mm512_undefined_ps();
57357 _mm512_store_ps(&mut r as *mut _ as *mut f32, a);
57358 assert_eq_m512(r, a);
57359 }
57360
57361 #[simd_test(enable = "avx512f")]
57362 unsafe fn test_mm512_mask_set1_epi32() {
57363 let src = _mm512_set1_epi32(2);
57364 let a: i32 = 11;
57365 let r = _mm512_mask_set1_epi32(src, 0, a);
57366 assert_eq_m512i(r, src);
57367 let r = _mm512_mask_set1_epi32(src, 0b11111111_11111111, a);
57368 let e = _mm512_set1_epi32(11);
57369 assert_eq_m512i(r, e);
57370 }
57371
57372 #[simd_test(enable = "avx512f")]
57373 unsafe fn test_mm512_maskz_set1_epi32() {
57374 let a: i32 = 11;
57375 let r = _mm512_maskz_set1_epi32(0, a);
57376 assert_eq_m512i(r, _mm512_setzero_si512());
57377 let r = _mm512_maskz_set1_epi32(0b11111111_11111111, a);
57378 let e = _mm512_set1_epi32(11);
57379 assert_eq_m512i(r, e);
57380 }
57381
57382 #[simd_test(enable = "avx512f,avx512vl")]
57383 unsafe fn test_mm256_mask_set1_epi32() {
57384 let src = _mm256_set1_epi32(2);
57385 let a: i32 = 11;
57386 let r = _mm256_mask_set1_epi32(src, 0, a);
57387 assert_eq_m256i(r, src);
57388 let r = _mm256_mask_set1_epi32(src, 0b11111111, a);
57389 let e = _mm256_set1_epi32(11);
57390 assert_eq_m256i(r, e);
57391 }
57392
57393 #[simd_test(enable = "avx512f")]
57394 unsafe fn test_mm256_maskz_set1_epi32() {
57395 let a: i32 = 11;
57396 let r = _mm256_maskz_set1_epi32(0, a);
57397 assert_eq_m256i(r, _mm256_setzero_si256());
57398 let r = _mm256_maskz_set1_epi32(0b11111111, a);
57399 let e = _mm256_set1_epi32(11);
57400 assert_eq_m256i(r, e);
57401 }
57402
57403 #[simd_test(enable = "avx512f,avx512vl")]
57404 unsafe fn test_mm_mask_set1_epi32() {
57405 let src = _mm_set1_epi32(2);
57406 let a: i32 = 11;
57407 let r = _mm_mask_set1_epi32(src, 0, a);
57408 assert_eq_m128i(r, src);
57409 let r = _mm_mask_set1_epi32(src, 0b00001111, a);
57410 let e = _mm_set1_epi32(11);
57411 assert_eq_m128i(r, e);
57412 }
57413
57414 #[simd_test(enable = "avx512f")]
57415 unsafe fn test_mm_maskz_set1_epi32() {
57416 let a: i32 = 11;
57417 let r = _mm_maskz_set1_epi32(0, a);
57418 assert_eq_m128i(r, _mm_setzero_si128());
57419 let r = _mm_maskz_set1_epi32(0b00001111, a);
57420 let e = _mm_set1_epi32(11);
57421 assert_eq_m128i(r, e);
57422 }
57423
57424 #[simd_test(enable = "avx512f")]
57425 unsafe fn test_mm_mask_move_ss() {
57426 let src = _mm_set_ps(10., 11., 100., 110.);
57427 let a = _mm_set_ps(1., 2., 10., 20.);
57428 let b = _mm_set_ps(3., 4., 30., 40.);
57429 let r = _mm_mask_move_ss(src, 0, a, b);
57430 let e = _mm_set_ps(1., 2., 10., 110.);
57431 assert_eq_m128(r, e);
57432 let r = _mm_mask_move_ss(src, 0b11111111, a, b);
57433 let e = _mm_set_ps(1., 2., 10., 40.);
57434 assert_eq_m128(r, e);
57435 }
57436
57437 #[simd_test(enable = "avx512f")]
57438 unsafe fn test_mm_maskz_move_ss() {
57439 let a = _mm_set_ps(1., 2., 10., 20.);
57440 let b = _mm_set_ps(3., 4., 30., 40.);
57441 let r = _mm_maskz_move_ss(0, a, b);
57442 let e = _mm_set_ps(1., 2., 10., 0.);
57443 assert_eq_m128(r, e);
57444 let r = _mm_maskz_move_ss(0b11111111, a, b);
57445 let e = _mm_set_ps(1., 2., 10., 40.);
57446 assert_eq_m128(r, e);
57447 }
57448
57449 #[simd_test(enable = "avx512f")]
57450 unsafe fn test_mm_mask_move_sd() {
57451 let src = _mm_set_pd(10., 11.);
57452 let a = _mm_set_pd(1., 2.);
57453 let b = _mm_set_pd(3., 4.);
57454 let r = _mm_mask_move_sd(src, 0, a, b);
57455 let e = _mm_set_pd(1., 11.);
57456 assert_eq_m128d(r, e);
57457 let r = _mm_mask_move_sd(src, 0b11111111, a, b);
57458 let e = _mm_set_pd(1., 4.);
57459 assert_eq_m128d(r, e);
57460 }
57461
57462 #[simd_test(enable = "avx512f")]
57463 unsafe fn test_mm_maskz_move_sd() {
57464 let a = _mm_set_pd(1., 2.);
57465 let b = _mm_set_pd(3., 4.);
57466 let r = _mm_maskz_move_sd(0, a, b);
57467 let e = _mm_set_pd(1., 0.);
57468 assert_eq_m128d(r, e);
57469 let r = _mm_maskz_move_sd(0b11111111, a, b);
57470 let e = _mm_set_pd(1., 4.);
57471 assert_eq_m128d(r, e);
57472 }
57473
57474 #[simd_test(enable = "avx512f")]
57475 unsafe fn test_mm_mask_add_ss() {
57476 let src = _mm_set_ps(10., 11., 100., 110.);
57477 let a = _mm_set_ps(1., 2., 10., 20.);
57478 let b = _mm_set_ps(3., 4., 30., 40.);
57479 let r = _mm_mask_add_ss(src, 0, a, b);
57480 let e = _mm_set_ps(1., 2., 10., 110.);
57481 assert_eq_m128(r, e);
57482 let r = _mm_mask_add_ss(src, 0b11111111, a, b);
57483 let e = _mm_set_ps(1., 2., 10., 60.);
57484 assert_eq_m128(r, e);
57485 }
57486
57487 #[simd_test(enable = "avx512f")]
57488 unsafe fn test_mm_maskz_add_ss() {
57489 let a = _mm_set_ps(1., 2., 10., 20.);
57490 let b = _mm_set_ps(3., 4., 30., 40.);
57491 let r = _mm_maskz_add_ss(0, a, b);
57492 let e = _mm_set_ps(1., 2., 10., 0.);
57493 assert_eq_m128(r, e);
57494 let r = _mm_maskz_add_ss(0b11111111, a, b);
57495 let e = _mm_set_ps(1., 2., 10., 60.);
57496 assert_eq_m128(r, e);
57497 }
57498
57499 #[simd_test(enable = "avx512f")]
57500 unsafe fn test_mm_mask_add_sd() {
57501 let src = _mm_set_pd(10., 11.);
57502 let a = _mm_set_pd(1., 2.);
57503 let b = _mm_set_pd(3., 4.);
57504 let r = _mm_mask_add_sd(src, 0, a, b);
57505 let e = _mm_set_pd(1., 11.);
57506 assert_eq_m128d(r, e);
57507 let r = _mm_mask_add_sd(src, 0b11111111, a, b);
57508 let e = _mm_set_pd(1., 6.);
57509 assert_eq_m128d(r, e);
57510 }
57511
57512 #[simd_test(enable = "avx512f")]
57513 unsafe fn test_mm_maskz_add_sd() {
57514 let a = _mm_set_pd(1., 2.);
57515 let b = _mm_set_pd(3., 4.);
57516 let r = _mm_maskz_add_sd(0, a, b);
57517 let e = _mm_set_pd(1., 0.);
57518 assert_eq_m128d(r, e);
57519 let r = _mm_maskz_add_sd(0b11111111, a, b);
57520 let e = _mm_set_pd(1., 6.);
57521 assert_eq_m128d(r, e);
57522 }
57523
57524 #[simd_test(enable = "avx512f")]
57525 unsafe fn test_mm_mask_sub_ss() {
57526 let src = _mm_set_ps(10., 11., 100., 110.);
57527 let a = _mm_set_ps(1., 2., 10., 20.);
57528 let b = _mm_set_ps(3., 4., 30., 40.);
57529 let r = _mm_mask_sub_ss(src, 0, a, b);
57530 let e = _mm_set_ps(1., 2., 10., 110.);
57531 assert_eq_m128(r, e);
57532 let r = _mm_mask_sub_ss(src, 0b11111111, a, b);
57533 let e = _mm_set_ps(1., 2., 10., -20.);
57534 assert_eq_m128(r, e);
57535 }
57536
57537 #[simd_test(enable = "avx512f")]
57538 unsafe fn test_mm_maskz_sub_ss() {
57539 let a = _mm_set_ps(1., 2., 10., 20.);
57540 let b = _mm_set_ps(3., 4., 30., 40.);
57541 let r = _mm_maskz_sub_ss(0, a, b);
57542 let e = _mm_set_ps(1., 2., 10., 0.);
57543 assert_eq_m128(r, e);
57544 let r = _mm_maskz_sub_ss(0b11111111, a, b);
57545 let e = _mm_set_ps(1., 2., 10., -20.);
57546 assert_eq_m128(r, e);
57547 }
57548
57549 #[simd_test(enable = "avx512f")]
57550 unsafe fn test_mm_mask_sub_sd() {
57551 let src = _mm_set_pd(10., 11.);
57552 let a = _mm_set_pd(1., 2.);
57553 let b = _mm_set_pd(3., 4.);
57554 let r = _mm_mask_sub_sd(src, 0, a, b);
57555 let e = _mm_set_pd(1., 11.);
57556 assert_eq_m128d(r, e);
57557 let r = _mm_mask_sub_sd(src, 0b11111111, a, b);
57558 let e = _mm_set_pd(1., -2.);
57559 assert_eq_m128d(r, e);
57560 }
57561
57562 #[simd_test(enable = "avx512f")]
57563 unsafe fn test_mm_maskz_sub_sd() {
57564 let a = _mm_set_pd(1., 2.);
57565 let b = _mm_set_pd(3., 4.);
57566 let r = _mm_maskz_sub_sd(0, a, b);
57567 let e = _mm_set_pd(1., 0.);
57568 assert_eq_m128d(r, e);
57569 let r = _mm_maskz_sub_sd(0b11111111, a, b);
57570 let e = _mm_set_pd(1., -2.);
57571 assert_eq_m128d(r, e);
57572 }
57573
57574 #[simd_test(enable = "avx512f")]
57575 unsafe fn test_mm_mask_mul_ss() {
57576 let src = _mm_set_ps(10., 11., 100., 110.);
57577 let a = _mm_set_ps(1., 2., 10., 20.);
57578 let b = _mm_set_ps(3., 4., 30., 40.);
57579 let r = _mm_mask_mul_ss(src, 0, a, b);
57580 let e = _mm_set_ps(1., 2., 10., 110.);
57581 assert_eq_m128(r, e);
57582 let r = _mm_mask_mul_ss(src, 0b11111111, a, b);
57583 let e = _mm_set_ps(1., 2., 10., 800.);
57584 assert_eq_m128(r, e);
57585 }
57586
57587 #[simd_test(enable = "avx512f")]
57588 unsafe fn test_mm_maskz_mul_ss() {
57589 let a = _mm_set_ps(1., 2., 10., 20.);
57590 let b = _mm_set_ps(3., 4., 30., 40.);
57591 let r = _mm_maskz_mul_ss(0, a, b);
57592 let e = _mm_set_ps(1., 2., 10., 0.);
57593 assert_eq_m128(r, e);
57594 let r = _mm_maskz_mul_ss(0b11111111, a, b);
57595 let e = _mm_set_ps(1., 2., 10., 800.);
57596 assert_eq_m128(r, e);
57597 }
57598
57599 #[simd_test(enable = "avx512f")]
57600 unsafe fn test_mm_mask_mul_sd() {
57601 let src = _mm_set_pd(10., 11.);
57602 let a = _mm_set_pd(1., 2.);
57603 let b = _mm_set_pd(3., 4.);
57604 let r = _mm_mask_mul_sd(src, 0, a, b);
57605 let e = _mm_set_pd(1., 11.);
57606 assert_eq_m128d(r, e);
57607 let r = _mm_mask_mul_sd(src, 0b11111111, a, b);
57608 let e = _mm_set_pd(1., 8.);
57609 assert_eq_m128d(r, e);
57610 }
57611
57612 #[simd_test(enable = "avx512f")]
57613 unsafe fn test_mm_maskz_mul_sd() {
57614 let a = _mm_set_pd(1., 2.);
57615 let b = _mm_set_pd(3., 4.);
57616 let r = _mm_maskz_mul_sd(0, a, b);
57617 let e = _mm_set_pd(1., 0.);
57618 assert_eq_m128d(r, e);
57619 let r = _mm_maskz_mul_sd(0b11111111, a, b);
57620 let e = _mm_set_pd(1., 8.);
57621 assert_eq_m128d(r, e);
57622 }
57623
57624 #[simd_test(enable = "avx512f")]
57625 unsafe fn test_mm_mask_div_ss() {
57626 let src = _mm_set_ps(10., 11., 100., 110.);
57627 let a = _mm_set_ps(1., 2., 10., 20.);
57628 let b = _mm_set_ps(3., 4., 30., 40.);
57629 let r = _mm_mask_div_ss(src, 0, a, b);
57630 let e = _mm_set_ps(1., 2., 10., 110.);
57631 assert_eq_m128(r, e);
57632 let r = _mm_mask_div_ss(src, 0b11111111, a, b);
57633 let e = _mm_set_ps(1., 2., 10., 0.5);
57634 assert_eq_m128(r, e);
57635 }
57636
57637 #[simd_test(enable = "avx512f")]
57638 unsafe fn test_mm_maskz_div_ss() {
57639 let a = _mm_set_ps(1., 2., 10., 20.);
57640 let b = _mm_set_ps(3., 4., 30., 40.);
57641 let r = _mm_maskz_div_ss(0, a, b);
57642 let e = _mm_set_ps(1., 2., 10., 0.);
57643 assert_eq_m128(r, e);
57644 let r = _mm_maskz_div_ss(0b11111111, a, b);
57645 let e = _mm_set_ps(1., 2., 10., 0.5);
57646 assert_eq_m128(r, e);
57647 }
57648
57649 #[simd_test(enable = "avx512f")]
57650 unsafe fn test_mm_mask_div_sd() {
57651 let src = _mm_set_pd(10., 11.);
57652 let a = _mm_set_pd(1., 2.);
57653 let b = _mm_set_pd(3., 4.);
57654 let r = _mm_mask_div_sd(src, 0, a, b);
57655 let e = _mm_set_pd(1., 11.);
57656 assert_eq_m128d(r, e);
57657 let r = _mm_mask_div_sd(src, 0b11111111, a, b);
57658 let e = _mm_set_pd(1., 0.5);
57659 assert_eq_m128d(r, e);
57660 }
57661
57662 #[simd_test(enable = "avx512f")]
57663 unsafe fn test_mm_maskz_div_sd() {
57664 let a = _mm_set_pd(1., 2.);
57665 let b = _mm_set_pd(3., 4.);
57666 let r = _mm_maskz_div_sd(0, a, b);
57667 let e = _mm_set_pd(1., 0.);
57668 assert_eq_m128d(r, e);
57669 let r = _mm_maskz_div_sd(0b11111111, a, b);
57670 let e = _mm_set_pd(1., 0.5);
57671 assert_eq_m128d(r, e);
57672 }
57673
57674 #[simd_test(enable = "avx512f")]
57675 unsafe fn test_mm_mask_max_ss() {
57676 let a = _mm_set_ps(0., 1., 2., 3.);
57677 let b = _mm_set_ps(4., 5., 6., 7.);
57678 let r = _mm_mask_max_ss(a, 0, a, b);
57679 let e = _mm_set_ps(0., 1., 2., 3.);
57680 assert_eq_m128(r, e);
57681 let r = _mm_mask_max_ss(a, 0b11111111, a, b);
57682 let e = _mm_set_ps(0., 1., 2., 7.);
57683 assert_eq_m128(r, e);
57684 }
57685
57686 #[simd_test(enable = "avx512f")]
57687 unsafe fn test_mm_maskz_max_ss() {
57688 let a = _mm_set_ps(0., 1., 2., 3.);
57689 let b = _mm_set_ps(4., 5., 6., 7.);
57690 let r = _mm_maskz_max_ss(0, a, b);
57691 let e = _mm_set_ps(0., 1., 2., 0.);
57692 assert_eq_m128(r, e);
57693 let r = _mm_maskz_max_ss(0b11111111, a, b);
57694 let e = _mm_set_ps(0., 1., 2., 7.);
57695 assert_eq_m128(r, e);
57696 }
57697
57698 #[simd_test(enable = "avx512f")]
57699 unsafe fn test_mm_mask_max_sd() {
57700 let a = _mm_set_pd(0., 1.);
57701 let b = _mm_set_pd(2., 3.);
57702 let r = _mm_mask_max_sd(a, 0, a, b);
57703 let e = _mm_set_pd(0., 1.);
57704 assert_eq_m128d(r, e);
57705 let r = _mm_mask_max_sd(a, 0b11111111, a, b);
57706 let e = _mm_set_pd(0., 3.);
57707 assert_eq_m128d(r, e);
57708 }
57709
57710 #[simd_test(enable = "avx512f")]
57711 unsafe fn test_mm_maskz_max_sd() {
57712 let a = _mm_set_pd(0., 1.);
57713 let b = _mm_set_pd(2., 3.);
57714 let r = _mm_maskz_max_sd(0, a, b);
57715 let e = _mm_set_pd(0., 0.);
57716 assert_eq_m128d(r, e);
57717 let r = _mm_maskz_max_sd(0b11111111, a, b);
57718 let e = _mm_set_pd(0., 3.);
57719 assert_eq_m128d(r, e);
57720 }
57721
57722 #[simd_test(enable = "avx512f")]
57723 unsafe fn test_mm_mask_min_ss() {
57724 let a = _mm_set_ps(0., 1., 2., 3.);
57725 let b = _mm_set_ps(4., 5., 6., 7.);
57726 let r = _mm_mask_min_ss(a, 0, a, b);
57727 let e = _mm_set_ps(0., 1., 2., 3.);
57728 assert_eq_m128(r, e);
57729 let r = _mm_mask_min_ss(a, 0b11111111, a, b);
57730 let e = _mm_set_ps(0., 1., 2., 3.);
57731 assert_eq_m128(r, e);
57732 }
57733
57734 #[simd_test(enable = "avx512f")]
57735 unsafe fn test_mm_maskz_min_ss() {
57736 let a = _mm_set_ps(0., 1., 2., 3.);
57737 let b = _mm_set_ps(4., 5., 6., 7.);
57738 let r = _mm_maskz_min_ss(0, a, b);
57739 let e = _mm_set_ps(0., 1., 2., 0.);
57740 assert_eq_m128(r, e);
57741 let r = _mm_maskz_min_ss(0b11111111, a, b);
57742 let e = _mm_set_ps(0., 1., 2., 3.);
57743 assert_eq_m128(r, e);
57744 }
57745
57746 #[simd_test(enable = "avx512f")]
57747 unsafe fn test_mm_mask_min_sd() {
57748 let a = _mm_set_pd(0., 1.);
57749 let b = _mm_set_pd(2., 3.);
57750 let r = _mm_mask_min_sd(a, 0, a, b);
57751 let e = _mm_set_pd(0., 1.);
57752 assert_eq_m128d(r, e);
57753 let r = _mm_mask_min_sd(a, 0b11111111, a, b);
57754 let e = _mm_set_pd(0., 1.);
57755 assert_eq_m128d(r, e);
57756 }
57757
57758 #[simd_test(enable = "avx512f")]
57759 unsafe fn test_mm_maskz_min_sd() {
57760 let a = _mm_set_pd(0., 1.);
57761 let b = _mm_set_pd(2., 3.);
57762 let r = _mm_maskz_min_sd(0, a, b);
57763 let e = _mm_set_pd(0., 0.);
57764 assert_eq_m128d(r, e);
57765 let r = _mm_maskz_min_sd(0b11111111, a, b);
57766 let e = _mm_set_pd(0., 1.);
57767 assert_eq_m128d(r, e);
57768 }
57769
57770 #[simd_test(enable = "avx512f")]
57771 unsafe fn test_mm_mask_sqrt_ss() {
57772 let src = _mm_set_ps(10., 11., 100., 110.);
57773 let a = _mm_set_ps(1., 2., 10., 20.);
57774 let b = _mm_set_ps(3., 4., 30., 4.);
57775 let r = _mm_mask_sqrt_ss(src, 0, a, b);
57776 let e = _mm_set_ps(1., 2., 10., 110.);
57777 assert_eq_m128(r, e);
57778 let r = _mm_mask_sqrt_ss(src, 0b11111111, a, b);
57779 let e = _mm_set_ps(1., 2., 10., 2.);
57780 assert_eq_m128(r, e);
57781 }
57782
57783 #[simd_test(enable = "avx512f")]
57784 unsafe fn test_mm_maskz_sqrt_ss() {
57785 let a = _mm_set_ps(1., 2., 10., 20.);
57786 let b = _mm_set_ps(3., 4., 30., 4.);
57787 let r = _mm_maskz_sqrt_ss(0, a, b);
57788 let e = _mm_set_ps(1., 2., 10., 0.);
57789 assert_eq_m128(r, e);
57790 let r = _mm_maskz_sqrt_ss(0b11111111, a, b);
57791 let e = _mm_set_ps(1., 2., 10., 2.);
57792 assert_eq_m128(r, e);
57793 }
57794
57795 #[simd_test(enable = "avx512f")]
57796 unsafe fn test_mm_mask_sqrt_sd() {
57797 let src = _mm_set_pd(10., 11.);
57798 let a = _mm_set_pd(1., 2.);
57799 let b = _mm_set_pd(3., 4.);
57800 let r = _mm_mask_sqrt_sd(src, 0, a, b);
57801 let e = _mm_set_pd(1., 11.);
57802 assert_eq_m128d(r, e);
57803 let r = _mm_mask_sqrt_sd(src, 0b11111111, a, b);
57804 let e = _mm_set_pd(1., 2.);
57805 assert_eq_m128d(r, e);
57806 }
57807
57808 #[simd_test(enable = "avx512f")]
57809 unsafe fn test_mm_maskz_sqrt_sd() {
57810 let a = _mm_set_pd(1., 2.);
57811 let b = _mm_set_pd(3., 4.);
57812 let r = _mm_maskz_sqrt_sd(0, a, b);
57813 let e = _mm_set_pd(1., 0.);
57814 assert_eq_m128d(r, e);
57815 let r = _mm_maskz_sqrt_sd(0b11111111, a, b);
57816 let e = _mm_set_pd(1., 2.);
57817 assert_eq_m128d(r, e);
57818 }
57819
57820 #[simd_test(enable = "avx512f")]
57821 unsafe fn test_mm_rsqrt14_ss() {
57822 let a = _mm_set_ps(1., 2., 10., 20.);
57823 let b = _mm_set_ps(3., 4., 30., 4.);
57824 let r = _mm_rsqrt14_ss(a, b);
57825 let e = _mm_set_ps(1., 2., 10., 0.5);
57826 assert_eq_m128(r, e);
57827 }
57828
57829 #[simd_test(enable = "avx512f")]
57830 unsafe fn test_mm_mask_rsqrt14_ss() {
57831 let src = _mm_set_ps(10., 11., 100., 110.);
57832 let a = _mm_set_ps(1., 2., 10., 20.);
57833 let b = _mm_set_ps(3., 4., 30., 4.);
57834 let r = _mm_mask_rsqrt14_ss(src, 0, a, b);
57835 let e = _mm_set_ps(1., 2., 10., 110.);
57836 assert_eq_m128(r, e);
57837 let r = _mm_mask_rsqrt14_ss(src, 0b11111111, a, b);
57838 let e = _mm_set_ps(1., 2., 10., 0.5);
57839 assert_eq_m128(r, e);
57840 }
57841
57842 #[simd_test(enable = "avx512f")]
57843 unsafe fn test_mm_maskz_rsqrt14_ss() {
57844 let a = _mm_set_ps(1., 2., 10., 20.);
57845 let b = _mm_set_ps(3., 4., 30., 4.);
57846 let r = _mm_maskz_rsqrt14_ss(0, a, b);
57847 let e = _mm_set_ps(1., 2., 10., 0.);
57848 assert_eq_m128(r, e);
57849 let r = _mm_maskz_rsqrt14_ss(0b11111111, a, b);
57850 let e = _mm_set_ps(1., 2., 10., 0.5);
57851 assert_eq_m128(r, e);
57852 }
57853
57854 #[simd_test(enable = "avx512f")]
57855 unsafe fn test_mm_rsqrt14_sd() {
57856 let a = _mm_set_pd(1., 2.);
57857 let b = _mm_set_pd(3., 4.);
57858 let r = _mm_rsqrt14_sd(a, b);
57859 let e = _mm_set_pd(1., 0.5);
57860 assert_eq_m128d(r, e);
57861 }
57862
57863 #[simd_test(enable = "avx512f")]
57864 unsafe fn test_mm_mask_rsqrt14_sd() {
57865 let src = _mm_set_pd(10., 11.);
57866 let a = _mm_set_pd(1., 2.);
57867 let b = _mm_set_pd(3., 4.);
57868 let r = _mm_mask_rsqrt14_sd(src, 0, a, b);
57869 let e = _mm_set_pd(1., 11.);
57870 assert_eq_m128d(r, e);
57871 let r = _mm_mask_rsqrt14_sd(src, 0b11111111, a, b);
57872 let e = _mm_set_pd(1., 0.5);
57873 assert_eq_m128d(r, e);
57874 }
57875
57876 #[simd_test(enable = "avx512f")]
57877 unsafe fn test_mm_maskz_rsqrt14_sd() {
57878 let a = _mm_set_pd(1., 2.);
57879 let b = _mm_set_pd(3., 4.);
57880 let r = _mm_maskz_rsqrt14_sd(0, a, b);
57881 let e = _mm_set_pd(1., 0.);
57882 assert_eq_m128d(r, e);
57883 let r = _mm_maskz_rsqrt14_sd(0b11111111, a, b);
57884 let e = _mm_set_pd(1., 0.5);
57885 assert_eq_m128d(r, e);
57886 }
57887
57888 #[simd_test(enable = "avx512f")]
57889 unsafe fn test_mm_rcp14_ss() {
57890 let a = _mm_set_ps(1., 2., 10., 20.);
57891 let b = _mm_set_ps(3., 4., 30., 4.);
57892 let r = _mm_rcp14_ss(a, b);
57893 let e = _mm_set_ps(1., 2., 10., 0.25);
57894 assert_eq_m128(r, e);
57895 }
57896
57897 #[simd_test(enable = "avx512f")]
57898 unsafe fn test_mm_mask_rcp14_ss() {
57899 let src = _mm_set_ps(10., 11., 100., 110.);
57900 let a = _mm_set_ps(1., 2., 10., 20.);
57901 let b = _mm_set_ps(3., 4., 30., 4.);
57902 let r = _mm_mask_rcp14_ss(src, 0, a, b);
57903 let e = _mm_set_ps(1., 2., 10., 110.);
57904 assert_eq_m128(r, e);
57905 let r = _mm_mask_rcp14_ss(src, 0b11111111, a, b);
57906 let e = _mm_set_ps(1., 2., 10., 0.25);
57907 assert_eq_m128(r, e);
57908 }
57909
57910 #[simd_test(enable = "avx512f")]
57911 unsafe fn test_mm_maskz_rcp14_ss() {
57912 let a = _mm_set_ps(1., 2., 10., 20.);
57913 let b = _mm_set_ps(3., 4., 30., 4.);
57914 let r = _mm_maskz_rcp14_ss(0, a, b);
57915 let e = _mm_set_ps(1., 2., 10., 0.);
57916 assert_eq_m128(r, e);
57917 let r = _mm_maskz_rcp14_ss(0b11111111, a, b);
57918 let e = _mm_set_ps(1., 2., 10., 0.25);
57919 assert_eq_m128(r, e);
57920 }
57921
57922 #[simd_test(enable = "avx512f")]
57923 unsafe fn test_mm_rcp14_sd() {
57924 let a = _mm_set_pd(1., 2.);
57925 let b = _mm_set_pd(3., 4.);
57926 let r = _mm_rcp14_sd(a, b);
57927 let e = _mm_set_pd(1., 0.25);
57928 assert_eq_m128d(r, e);
57929 }
57930
57931 #[simd_test(enable = "avx512f")]
57932 unsafe fn test_mm_mask_rcp14_sd() {
57933 let src = _mm_set_pd(10., 11.);
57934 let a = _mm_set_pd(1., 2.);
57935 let b = _mm_set_pd(3., 4.);
57936 let r = _mm_mask_rcp14_sd(src, 0, a, b);
57937 let e = _mm_set_pd(1., 11.);
57938 assert_eq_m128d(r, e);
57939 let r = _mm_mask_rcp14_sd(src, 0b11111111, a, b);
57940 let e = _mm_set_pd(1., 0.25);
57941 assert_eq_m128d(r, e);
57942 }
57943
57944 #[simd_test(enable = "avx512f")]
57945 unsafe fn test_mm_maskz_rcp14_sd() {
57946 let a = _mm_set_pd(1., 2.);
57947 let b = _mm_set_pd(3., 4.);
57948 let r = _mm_maskz_rcp14_sd(0, a, b);
57949 let e = _mm_set_pd(1., 0.);
57950 assert_eq_m128d(r, e);
57951 let r = _mm_maskz_rcp14_sd(0b11111111, a, b);
57952 let e = _mm_set_pd(1., 0.25);
57953 assert_eq_m128d(r, e);
57954 }
57955
57956 #[simd_test(enable = "avx512f")]
57957 unsafe fn test_mm_getexp_ss() {
57958 let a = _mm_set1_ps(2.);
57959 let b = _mm_set1_ps(3.);
57960 let r = _mm_getexp_ss(a, b);
57961 let e = _mm_set_ps(2., 2., 2., 1.);
57962 assert_eq_m128(r, e);
57963 }
57964
57965 #[simd_test(enable = "avx512f")]
57966 unsafe fn test_mm_mask_getexp_ss() {
57967 let a = _mm_set1_ps(2.);
57968 let b = _mm_set1_ps(3.);
57969 let r = _mm_mask_getexp_ss(a, 0, a, b);
57970 let e = _mm_set_ps(2., 2., 2., 2.);
57971 assert_eq_m128(r, e);
57972 let r = _mm_mask_getexp_ss(a, 0b11111111, a, b);
57973 let e = _mm_set_ps(2., 2., 2., 1.);
57974 assert_eq_m128(r, e);
57975 }
57976
57977 #[simd_test(enable = "avx512f")]
57978 unsafe fn test_mm_maskz_getexp_ss() {
57979 let a = _mm_set1_ps(2.);
57980 let b = _mm_set1_ps(3.);
57981 let r = _mm_maskz_getexp_ss(0, a, b);
57982 let e = _mm_set_ps(2., 2., 2., 0.);
57983 assert_eq_m128(r, e);
57984 let r = _mm_maskz_getexp_ss(0b11111111, a, b);
57985 let e = _mm_set_ps(2., 2., 2., 1.);
57986 assert_eq_m128(r, e);
57987 }
57988
57989 #[simd_test(enable = "avx512f")]
57990 unsafe fn test_mm_getexp_sd() {
57991 let a = _mm_set1_pd(2.);
57992 let b = _mm_set1_pd(3.);
57993 let r = _mm_getexp_sd(a, b);
57994 let e = _mm_set_pd(2., 1.);
57995 assert_eq_m128d(r, e);
57996 }
57997
57998 #[simd_test(enable = "avx512f")]
57999 unsafe fn test_mm_mask_getexp_sd() {
58000 let a = _mm_set1_pd(2.);
58001 let b = _mm_set1_pd(3.);
58002 let r = _mm_mask_getexp_sd(a, 0, a, b);
58003 let e = _mm_set_pd(2., 2.);
58004 assert_eq_m128d(r, e);
58005 let r = _mm_mask_getexp_sd(a, 0b11111111, a, b);
58006 let e = _mm_set_pd(2., 1.);
58007 assert_eq_m128d(r, e);
58008 }
58009
58010 #[simd_test(enable = "avx512f")]
58011 unsafe fn test_mm_maskz_getexp_sd() {
58012 let a = _mm_set1_pd(2.);
58013 let b = _mm_set1_pd(3.);
58014 let r = _mm_maskz_getexp_sd(0, a, b);
58015 let e = _mm_set_pd(2., 0.);
58016 assert_eq_m128d(r, e);
58017 let r = _mm_maskz_getexp_sd(0b11111111, a, b);
58018 let e = _mm_set_pd(2., 1.);
58019 assert_eq_m128d(r, e);
58020 }
58021
58022 #[simd_test(enable = "avx512f")]
58023 unsafe fn test_mm_getmant_ss() {
58024 let a = _mm_set1_ps(20.);
58025 let b = _mm_set1_ps(10.);
58026 let r = _mm_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
58027 let e = _mm_set_ps(20., 20., 20., 1.25);
58028 assert_eq_m128(r, e);
58029 }
58030
58031 #[simd_test(enable = "avx512f")]
58032 unsafe fn test_mm_mask_getmant_ss() {
58033 let a = _mm_set1_ps(20.);
58034 let b = _mm_set1_ps(10.);
58035 let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
58036 let e = _mm_set_ps(20., 20., 20., 20.);
58037 assert_eq_m128(r, e);
58038 let r = _mm_mask_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
58039 let e = _mm_set_ps(20., 20., 20., 1.25);
58040 assert_eq_m128(r, e);
58041 }
58042
58043 #[simd_test(enable = "avx512f")]
58044 unsafe fn test_mm_maskz_getmant_ss() {
58045 let a = _mm_set1_ps(20.);
58046 let b = _mm_set1_ps(10.);
58047 let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
58048 let e = _mm_set_ps(20., 20., 20., 0.);
58049 assert_eq_m128(r, e);
58050 let r = _mm_maskz_getmant_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
58051 let e = _mm_set_ps(20., 20., 20., 1.25);
58052 assert_eq_m128(r, e);
58053 }
58054
58055 #[simd_test(enable = "avx512f")]
58056 unsafe fn test_mm_getmant_sd() {
58057 let a = _mm_set1_pd(20.);
58058 let b = _mm_set1_pd(10.);
58059 let r = _mm_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, b);
58060 let e = _mm_set_pd(20., 1.25);
58061 assert_eq_m128d(r, e);
58062 }
58063
58064 #[simd_test(enable = "avx512f")]
58065 unsafe fn test_mm_mask_getmant_sd() {
58066 let a = _mm_set1_pd(20.);
58067 let b = _mm_set1_pd(10.);
58068 let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a, b);
58069 let e = _mm_set_pd(20., 20.);
58070 assert_eq_m128d(r, e);
58071 let r = _mm_mask_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11111111, a, b);
58072 let e = _mm_set_pd(20., 1.25);
58073 assert_eq_m128d(r, e);
58074 }
58075
58076 #[simd_test(enable = "avx512f")]
58077 unsafe fn test_mm_maskz_getmant_sd() {
58078 let a = _mm_set1_pd(20.);
58079 let b = _mm_set1_pd(10.);
58080 let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a, b);
58081 let e = _mm_set_pd(20., 0.);
58082 assert_eq_m128d(r, e);
58083 let r = _mm_maskz_getmant_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11111111, a, b);
58084 let e = _mm_set_pd(20., 1.25);
58085 assert_eq_m128d(r, e);
58086 }
58087
58088 #[simd_test(enable = "avx512f")]
58089 unsafe fn test_mm_roundscale_ss() {
58090 let a = _mm_set1_ps(2.2);
58091 let b = _mm_set1_ps(1.1);
58092 let r = _mm_roundscale_ss::<0>(a, b);
58093 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58094 assert_eq_m128(r, e);
58095 }
58096
58097 #[simd_test(enable = "avx512f")]
58098 unsafe fn test_mm_mask_roundscale_ss() {
58099 let a = _mm_set1_ps(2.2);
58100 let b = _mm_set1_ps(1.1);
58101 let r = _mm_mask_roundscale_ss::<0>(a, 0, a, b);
58102 let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
58103 assert_eq_m128(r, e);
58104 let r = _mm_mask_roundscale_ss::<0>(a, 0b11111111, a, b);
58105 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58106 assert_eq_m128(r, e);
58107 }
58108
58109 #[simd_test(enable = "avx512f")]
58110 unsafe fn test_mm_maskz_roundscale_ss() {
58111 let a = _mm_set1_ps(2.2);
58112 let b = _mm_set1_ps(1.1);
58113 let r = _mm_maskz_roundscale_ss::<0>(0, a, b);
58114 let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
58115 assert_eq_m128(r, e);
58116 let r = _mm_maskz_roundscale_ss::<0>(0b11111111, a, b);
58117 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
58118 assert_eq_m128(r, e);
58119 }
58120
58121 #[simd_test(enable = "avx512f")]
58122 unsafe fn test_mm_roundscale_sd() {
58123 let a = _mm_set1_pd(2.2);
58124 let b = _mm_set1_pd(1.1);
58125 let r = _mm_roundscale_sd::<0>(a, b);
58126 let e = _mm_set_pd(2.2, 1.0);
58127 assert_eq_m128d(r, e);
58128 }
58129
58130 #[simd_test(enable = "avx512f")]
58131 unsafe fn test_mm_mask_roundscale_sd() {
58132 let a = _mm_set1_pd(2.2);
58133 let b = _mm_set1_pd(1.1);
58134 let r = _mm_mask_roundscale_sd::<0>(a, 0, a, b);
58135 let e = _mm_set_pd(2.2, 2.2);
58136 assert_eq_m128d(r, e);
58137 let r = _mm_mask_roundscale_sd::<0>(a, 0b11111111, a, b);
58138 let e = _mm_set_pd(2.2, 1.0);
58139 assert_eq_m128d(r, e);
58140 }
58141
58142 #[simd_test(enable = "avx512f")]
58143 unsafe fn test_mm_maskz_roundscale_sd() {
58144 let a = _mm_set1_pd(2.2);
58145 let b = _mm_set1_pd(1.1);
58146 let r = _mm_maskz_roundscale_sd::<0>(0, a, b);
58147 let e = _mm_set_pd(2.2, 0.0);
58148 assert_eq_m128d(r, e);
58149 let r = _mm_maskz_roundscale_sd::<0>(0b11111111, a, b);
58150 let e = _mm_set_pd(2.2, 1.0);
58151 assert_eq_m128d(r, e);
58152 }
58153
58154 #[simd_test(enable = "avx512f")]
58155 unsafe fn test_mm_scalef_ss() {
58156 let a = _mm_set1_ps(1.);
58157 let b = _mm_set1_ps(3.);
58158 let r = _mm_scalef_ss(a, b);
58159 let e = _mm_set_ps(1., 1., 1., 8.);
58160 assert_eq_m128(r, e);
58161 }
58162
58163 #[simd_test(enable = "avx512f")]
58164 unsafe fn test_mm_mask_scalef_ss() {
58165 let a = _mm_set1_ps(1.);
58166 let b = _mm_set1_ps(3.);
58167 let r = _mm_mask_scalef_ss(a, 0, a, b);
58168 let e = _mm_set_ps(1., 1., 1., 1.);
58169 assert_eq_m128(r, e);
58170 let r = _mm_mask_scalef_ss(a, 0b11111111, a, b);
58171 let e = _mm_set_ps(1., 1., 1., 8.);
58172 assert_eq_m128(r, e);
58173 }
58174
58175 #[simd_test(enable = "avx512f")]
58176 unsafe fn test_mm_maskz_scalef_ss() {
58177 let a = _mm_set1_ps(1.);
58178 let b = _mm_set1_ps(3.);
58179 let r = _mm_maskz_scalef_ss(0, a, b);
58180 let e = _mm_set_ps(1., 1., 1., 0.);
58181 assert_eq_m128(r, e);
58182 let r = _mm_maskz_scalef_ss(0b11111111, a, b);
58183 let e = _mm_set_ps(1., 1., 1., 8.);
58184 assert_eq_m128(r, e);
58185 }
58186
58187 #[simd_test(enable = "avx512f")]
58188 unsafe fn test_mm_scalef_sd() {
58189 let a = _mm_set1_pd(1.);
58190 let b = _mm_set1_pd(3.);
58191 let r = _mm_scalef_sd(a, b);
58192 let e = _mm_set_pd(1., 8.);
58193 assert_eq_m128d(r, e);
58194 }
58195
58196 #[simd_test(enable = "avx512f")]
58197 unsafe fn test_mm_mask_scalef_sd() {
58198 let a = _mm_set1_pd(1.);
58199 let b = _mm_set1_pd(3.);
58200 let r = _mm_mask_scalef_sd(a, 0, a, b);
58201 let e = _mm_set_pd(1., 1.);
58202 assert_eq_m128d(r, e);
58203 let r = _mm_mask_scalef_sd(a, 0b11111111, a, b);
58204 let e = _mm_set_pd(1., 8.);
58205 assert_eq_m128d(r, e);
58206 }
58207
58208 #[simd_test(enable = "avx512f")]
58209 unsafe fn test_mm_maskz_scalef_sd() {
58210 let a = _mm_set1_pd(1.);
58211 let b = _mm_set1_pd(3.);
58212 let r = _mm_maskz_scalef_sd(0, a, b);
58213 let e = _mm_set_pd(1., 0.);
58214 assert_eq_m128d(r, e);
58215 let r = _mm_maskz_scalef_sd(0b11111111, a, b);
58216 let e = _mm_set_pd(1., 8.);
58217 assert_eq_m128d(r, e);
58218 }
58219
58220 #[simd_test(enable = "avx512f")]
58221 unsafe fn test_mm_mask_fmadd_ss() {
58222 let a = _mm_set1_ps(1.);
58223 let b = _mm_set1_ps(2.);
58224 let c = _mm_set1_ps(3.);
58225 let r = _mm_mask_fmadd_ss(a, 0, b, c);
58226 assert_eq_m128(r, a);
58227 let r = _mm_mask_fmadd_ss(a, 0b11111111, b, c);
58228 let e = _mm_set_ps(1., 1., 1., 5.);
58229 assert_eq_m128(r, e);
58230 }
58231
58232 #[simd_test(enable = "avx512f")]
58233 unsafe fn test_mm_maskz_fmadd_ss() {
58234 let a = _mm_set1_ps(1.);
58235 let b = _mm_set1_ps(2.);
58236 let c = _mm_set1_ps(3.);
58237 let r = _mm_maskz_fmadd_ss(0, a, b, c);
58238 let e = _mm_set_ps(1., 1., 1., 0.);
58239 assert_eq_m128(r, e);
58240 let r = _mm_maskz_fmadd_ss(0b11111111, a, b, c);
58241 let e = _mm_set_ps(1., 1., 1., 5.);
58242 assert_eq_m128(r, e);
58243 }
58244
58245 #[simd_test(enable = "avx512f")]
58246 unsafe fn test_mm_mask3_fmadd_ss() {
58247 let a = _mm_set1_ps(1.);
58248 let b = _mm_set1_ps(2.);
58249 let c = _mm_set1_ps(3.);
58250 let r = _mm_mask3_fmadd_ss(a, b, c, 0);
58251 assert_eq_m128(r, c);
58252 let r = _mm_mask3_fmadd_ss(a, b, c, 0b11111111);
58253 let e = _mm_set_ps(3., 3., 3., 5.);
58254 assert_eq_m128(r, e);
58255 }
58256
58257 #[simd_test(enable = "avx512f")]
58258 unsafe fn test_mm_mask_fmadd_sd() {
58259 let a = _mm_set1_pd(1.);
58260 let b = _mm_set1_pd(2.);
58261 let c = _mm_set1_pd(3.);
58262 let r = _mm_mask_fmadd_sd(a, 0, b, c);
58263 assert_eq_m128d(r, a);
58264 let r = _mm_mask_fmadd_sd(a, 0b11111111, b, c);
58265 let e = _mm_set_pd(1., 5.);
58266 assert_eq_m128d(r, e);
58267 }
58268
58269 #[simd_test(enable = "avx512f")]
58270 unsafe fn test_mm_maskz_fmadd_sd() {
58271 let a = _mm_set1_pd(1.);
58272 let b = _mm_set1_pd(2.);
58273 let c = _mm_set1_pd(3.);
58274 let r = _mm_maskz_fmadd_sd(0, a, b, c);
58275 let e = _mm_set_pd(1., 0.);
58276 assert_eq_m128d(r, e);
58277 let r = _mm_maskz_fmadd_sd(0b11111111, a, b, c);
58278 let e = _mm_set_pd(1., 5.);
58279 assert_eq_m128d(r, e);
58280 }
58281
58282 #[simd_test(enable = "avx512f")]
58283 unsafe fn test_mm_mask3_fmadd_sd() {
58284 let a = _mm_set1_pd(1.);
58285 let b = _mm_set1_pd(2.);
58286 let c = _mm_set1_pd(3.);
58287 let r = _mm_mask3_fmadd_sd(a, b, c, 0);
58288 assert_eq_m128d(r, c);
58289 let r = _mm_mask3_fmadd_sd(a, b, c, 0b11111111);
58290 let e = _mm_set_pd(3., 5.);
58291 assert_eq_m128d(r, e);
58292 }
58293
58294 #[simd_test(enable = "avx512f")]
58295 unsafe fn test_mm_mask_fmsub_ss() {
58296 let a = _mm_set1_ps(1.);
58297 let b = _mm_set1_ps(2.);
58298 let c = _mm_set1_ps(3.);
58299 let r = _mm_mask_fmsub_ss(a, 0, b, c);
58300 assert_eq_m128(r, a);
58301 let r = _mm_mask_fmsub_ss(a, 0b11111111, b, c);
58302 let e = _mm_set_ps(1., 1., 1., -1.);
58303 assert_eq_m128(r, e);
58304 }
58305
58306 #[simd_test(enable = "avx512f")]
58307 unsafe fn test_mm_maskz_fmsub_ss() {
58308 let a = _mm_set1_ps(1.);
58309 let b = _mm_set1_ps(2.);
58310 let c = _mm_set1_ps(3.);
58311 let r = _mm_maskz_fmsub_ss(0, a, b, c);
58312 let e = _mm_set_ps(1., 1., 1., 0.);
58313 assert_eq_m128(r, e);
58314 let r = _mm_maskz_fmsub_ss(0b11111111, a, b, c);
58315 let e = _mm_set_ps(1., 1., 1., -1.);
58316 assert_eq_m128(r, e);
58317 }
58318
58319 #[simd_test(enable = "avx512f")]
58320 unsafe fn test_mm_mask3_fmsub_ss() {
58321 let a = _mm_set1_ps(1.);
58322 let b = _mm_set1_ps(2.);
58323 let c = _mm_set1_ps(3.);
58324 let r = _mm_mask3_fmsub_ss(a, b, c, 0);
58325 assert_eq_m128(r, c);
58326 let r = _mm_mask3_fmsub_ss(a, b, c, 0b11111111);
58327 let e = _mm_set_ps(3., 3., 3., -1.);
58328 assert_eq_m128(r, e);
58329 }
58330
58331 #[simd_test(enable = "avx512f")]
58332 unsafe fn test_mm_mask_fmsub_sd() {
58333 let a = _mm_set1_pd(1.);
58334 let b = _mm_set1_pd(2.);
58335 let c = _mm_set1_pd(3.);
58336 let r = _mm_mask_fmsub_sd(a, 0, b, c);
58337 assert_eq_m128d(r, a);
58338 let r = _mm_mask_fmsub_sd(a, 0b11111111, b, c);
58339 let e = _mm_set_pd(1., -1.);
58340 assert_eq_m128d(r, e);
58341 }
58342
58343 #[simd_test(enable = "avx512f")]
58344 unsafe fn test_mm_maskz_fmsub_sd() {
58345 let a = _mm_set1_pd(1.);
58346 let b = _mm_set1_pd(2.);
58347 let c = _mm_set1_pd(3.);
58348 let r = _mm_maskz_fmsub_sd(0, a, b, c);
58349 let e = _mm_set_pd(1., 0.);
58350 assert_eq_m128d(r, e);
58351 let r = _mm_maskz_fmsub_sd(0b11111111, a, b, c);
58352 let e = _mm_set_pd(1., -1.);
58353 assert_eq_m128d(r, e);
58354 }
58355
58356 #[simd_test(enable = "avx512f")]
58357 unsafe fn test_mm_mask3_fmsub_sd() {
58358 let a = _mm_set1_pd(1.);
58359 let b = _mm_set1_pd(2.);
58360 let c = _mm_set1_pd(3.);
58361 let r = _mm_mask3_fmsub_sd(a, b, c, 0);
58362 assert_eq_m128d(r, c);
58363 let r = _mm_mask3_fmsub_sd(a, b, c, 0b11111111);
58364 let e = _mm_set_pd(3., -1.);
58365 assert_eq_m128d(r, e);
58366 }
58367
58368 #[simd_test(enable = "avx512f")]
58369 unsafe fn test_mm_mask_fnmadd_ss() {
58370 let a = _mm_set1_ps(1.);
58371 let b = _mm_set1_ps(2.);
58372 let c = _mm_set1_ps(3.);
58373 let r = _mm_mask_fnmadd_ss(a, 0, b, c);
58374 assert_eq_m128(r, a);
58375 let r = _mm_mask_fnmadd_ss(a, 0b11111111, b, c);
58376 let e = _mm_set_ps(1., 1., 1., 1.);
58377 assert_eq_m128(r, e);
58378 }
58379
58380 #[simd_test(enable = "avx512f")]
58381 unsafe fn test_mm_maskz_fnmadd_ss() {
58382 let a = _mm_set1_ps(1.);
58383 let b = _mm_set1_ps(2.);
58384 let c = _mm_set1_ps(3.);
58385 let r = _mm_maskz_fnmadd_ss(0, a, b, c);
58386 let e = _mm_set_ps(1., 1., 1., 0.);
58387 assert_eq_m128(r, e);
58388 let r = _mm_maskz_fnmadd_ss(0b11111111, a, b, c);
58389 let e = _mm_set_ps(1., 1., 1., 1.);
58390 assert_eq_m128(r, e);
58391 }
58392
58393 #[simd_test(enable = "avx512f")]
58394 unsafe fn test_mm_mask3_fnmadd_ss() {
58395 let a = _mm_set1_ps(1.);
58396 let b = _mm_set1_ps(2.);
58397 let c = _mm_set1_ps(3.);
58398 let r = _mm_mask3_fnmadd_ss(a, b, c, 0);
58399 assert_eq_m128(r, c);
58400 let r = _mm_mask3_fnmadd_ss(a, b, c, 0b11111111);
58401 let e = _mm_set_ps(3., 3., 3., 1.);
58402 assert_eq_m128(r, e);
58403 }
58404
58405 #[simd_test(enable = "avx512f")]
58406 unsafe fn test_mm_mask_fnmadd_sd() {
58407 let a = _mm_set1_pd(1.);
58408 let b = _mm_set1_pd(2.);
58409 let c = _mm_set1_pd(3.);
58410 let r = _mm_mask_fnmadd_sd(a, 0, b, c);
58411 assert_eq_m128d(r, a);
58412 let r = _mm_mask_fnmadd_sd(a, 0b11111111, b, c);
58413 let e = _mm_set_pd(1., 1.);
58414 assert_eq_m128d(r, e);
58415 }
58416
58417 #[simd_test(enable = "avx512f")]
58418 unsafe fn test_mm_maskz_fnmadd_sd() {
58419 let a = _mm_set1_pd(1.);
58420 let b = _mm_set1_pd(2.);
58421 let c = _mm_set1_pd(3.);
58422 let r = _mm_maskz_fnmadd_sd(0, a, b, c);
58423 let e = _mm_set_pd(1., 0.);
58424 assert_eq_m128d(r, e);
58425 let r = _mm_maskz_fnmadd_sd(0b11111111, a, b, c);
58426 let e = _mm_set_pd(1., 1.);
58427 assert_eq_m128d(r, e);
58428 }
58429
58430 #[simd_test(enable = "avx512f")]
58431 unsafe fn test_mm_mask3_fnmadd_sd() {
58432 let a = _mm_set1_pd(1.);
58433 let b = _mm_set1_pd(2.);
58434 let c = _mm_set1_pd(3.);
58435 let r = _mm_mask3_fnmadd_sd(a, b, c, 0);
58436 assert_eq_m128d(r, c);
58437 let r = _mm_mask3_fnmadd_sd(a, b, c, 0b11111111);
58438 let e = _mm_set_pd(3., 1.);
58439 assert_eq_m128d(r, e);
58440 }
58441
58442 #[simd_test(enable = "avx512f")]
58443 unsafe fn test_mm_mask_fnmsub_ss() {
58444 let a = _mm_set1_ps(1.);
58445 let b = _mm_set1_ps(2.);
58446 let c = _mm_set1_ps(3.);
58447 let r = _mm_mask_fnmsub_ss(a, 0, b, c);
58448 assert_eq_m128(r, a);
58449 let r = _mm_mask_fnmsub_ss(a, 0b11111111, b, c);
58450 let e = _mm_set_ps(1., 1., 1., -5.);
58451 assert_eq_m128(r, e);
58452 }
58453
58454 #[simd_test(enable = "avx512f")]
58455 unsafe fn test_mm_maskz_fnmsub_ss() {
58456 let a = _mm_set1_ps(1.);
58457 let b = _mm_set1_ps(2.);
58458 let c = _mm_set1_ps(3.);
58459 let r = _mm_maskz_fnmsub_ss(0, a, b, c);
58460 let e = _mm_set_ps(1., 1., 1., 0.);
58461 assert_eq_m128(r, e);
58462 let r = _mm_maskz_fnmsub_ss(0b11111111, a, b, c);
58463 let e = _mm_set_ps(1., 1., 1., -5.);
58464 assert_eq_m128(r, e);
58465 }
58466
58467 #[simd_test(enable = "avx512f")]
58468 unsafe fn test_mm_mask3_fnmsub_ss() {
58469 let a = _mm_set1_ps(1.);
58470 let b = _mm_set1_ps(2.);
58471 let c = _mm_set1_ps(3.);
58472 let r = _mm_mask3_fnmsub_ss(a, b, c, 0);
58473 assert_eq_m128(r, c);
58474 let r = _mm_mask3_fnmsub_ss(a, b, c, 0b11111111);
58475 let e = _mm_set_ps(3., 3., 3., -5.);
58476 assert_eq_m128(r, e);
58477 }
58478
58479 #[simd_test(enable = "avx512f")]
58480 unsafe fn test_mm_mask_fnmsub_sd() {
58481 let a = _mm_set1_pd(1.);
58482 let b = _mm_set1_pd(2.);
58483 let c = _mm_set1_pd(3.);
58484 let r = _mm_mask_fnmsub_sd(a, 0, b, c);
58485 assert_eq_m128d(r, a);
58486 let r = _mm_mask_fnmsub_sd(a, 0b11111111, b, c);
58487 let e = _mm_set_pd(1., -5.);
58488 assert_eq_m128d(r, e);
58489 }
58490
58491 #[simd_test(enable = "avx512f")]
58492 unsafe fn test_mm_maskz_fnmsub_sd() {
58493 let a = _mm_set1_pd(1.);
58494 let b = _mm_set1_pd(2.);
58495 let c = _mm_set1_pd(3.);
58496 let r = _mm_maskz_fnmsub_sd(0, a, b, c);
58497 let e = _mm_set_pd(1., 0.);
58498 assert_eq_m128d(r, e);
58499 let r = _mm_maskz_fnmsub_sd(0b11111111, a, b, c);
58500 let e = _mm_set_pd(1., -5.);
58501 assert_eq_m128d(r, e);
58502 }
58503
58504 #[simd_test(enable = "avx512f")]
58505 unsafe fn test_mm_mask3_fnmsub_sd() {
58506 let a = _mm_set1_pd(1.);
58507 let b = _mm_set1_pd(2.);
58508 let c = _mm_set1_pd(3.);
58509 let r = _mm_mask3_fnmsub_sd(a, b, c, 0);
58510 assert_eq_m128d(r, c);
58511 let r = _mm_mask3_fnmsub_sd(a, b, c, 0b11111111);
58512 let e = _mm_set_pd(3., -5.);
58513 assert_eq_m128d(r, e);
58514 }
58515
58516 #[simd_test(enable = "avx512f")]
58517 unsafe fn test_mm_add_round_ss() {
58518 let a = _mm_set_ps(1., 2., 10., 20.);
58519 let b = _mm_set_ps(3., 4., 30., 40.);
58520 let r = _mm_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58521 let e = _mm_set_ps(1., 2., 10., 60.);
58522 assert_eq_m128(r, e);
58523 }
58524
58525 #[simd_test(enable = "avx512f")]
58526 unsafe fn test_mm_mask_add_round_ss() {
58527 let src = _mm_set_ps(10., 11., 100., 110.);
58528 let a = _mm_set_ps(1., 2., 10., 20.);
58529 let b = _mm_set_ps(3., 4., 30., 40.);
58530 let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58531 let e = _mm_set_ps(1., 2., 10., 110.);
58532 assert_eq_m128(r, e);
58533 let r = _mm_mask_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58534 src, 0b11111111, a, b,
58535 );
58536 let e = _mm_set_ps(1., 2., 10., 60.);
58537 assert_eq_m128(r, e);
58538 }
58539
58540 #[simd_test(enable = "avx512f")]
58541 unsafe fn test_mm_maskz_add_round_ss() {
58542 let a = _mm_set_ps(1., 2., 10., 20.);
58543 let b = _mm_set_ps(3., 4., 30., 40.);
58544 let r = _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58545 let e = _mm_set_ps(1., 2., 10., 0.);
58546 assert_eq_m128(r, e);
58547 let r =
58548 _mm_maskz_add_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58549 let e = _mm_set_ps(1., 2., 10., 60.);
58550 assert_eq_m128(r, e);
58551 }
58552
58553 #[simd_test(enable = "avx512f")]
58554 unsafe fn test_mm_add_round_sd() {
58555 let a = _mm_set_pd(1., 2.);
58556 let b = _mm_set_pd(3., 4.);
58557 let r = _mm_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58558 let e = _mm_set_pd(1., 6.);
58559 assert_eq_m128d(r, e);
58560 }
58561
58562 #[simd_test(enable = "avx512f")]
58563 unsafe fn test_mm_mask_add_round_sd() {
58564 let src = _mm_set_pd(10., 11.);
58565 let a = _mm_set_pd(1., 2.);
58566 let b = _mm_set_pd(3., 4.);
58567 let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58568 let e = _mm_set_pd(1., 11.);
58569 assert_eq_m128d(r, e);
58570 let r = _mm_mask_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58571 src, 0b11111111, a, b,
58572 );
58573 let e = _mm_set_pd(1., 6.);
58574 assert_eq_m128d(r, e);
58575 }
58576
58577 #[simd_test(enable = "avx512f")]
58578 unsafe fn test_mm_maskz_add_round_sd() {
58579 let a = _mm_set_pd(1., 2.);
58580 let b = _mm_set_pd(3., 4.);
58581 let r = _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58582 let e = _mm_set_pd(1., 0.);
58583 assert_eq_m128d(r, e);
58584 let r =
58585 _mm_maskz_add_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58586 let e = _mm_set_pd(1., 6.);
58587 assert_eq_m128d(r, e);
58588 }
58589
58590 #[simd_test(enable = "avx512f")]
58591 unsafe fn test_mm_sub_round_ss() {
58592 let a = _mm_set_ps(1., 2., 10., 20.);
58593 let b = _mm_set_ps(3., 4., 30., 40.);
58594 let r = _mm_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58595 let e = _mm_set_ps(1., 2., 10., -20.);
58596 assert_eq_m128(r, e);
58597 }
58598
58599 #[simd_test(enable = "avx512f")]
58600 unsafe fn test_mm_mask_sub_round_ss() {
58601 let src = _mm_set_ps(10., 11., 100., 110.);
58602 let a = _mm_set_ps(1., 2., 10., 20.);
58603 let b = _mm_set_ps(3., 4., 30., 40.);
58604 let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58605 let e = _mm_set_ps(1., 2., 10., 110.);
58606 assert_eq_m128(r, e);
58607 let r = _mm_mask_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58608 src, 0b11111111, a, b,
58609 );
58610 let e = _mm_set_ps(1., 2., 10., -20.);
58611 assert_eq_m128(r, e);
58612 }
58613
58614 #[simd_test(enable = "avx512f")]
58615 unsafe fn test_mm_maskz_sub_round_ss() {
58616 let a = _mm_set_ps(1., 2., 10., 20.);
58617 let b = _mm_set_ps(3., 4., 30., 40.);
58618 let r = _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58619 let e = _mm_set_ps(1., 2., 10., 0.);
58620 assert_eq_m128(r, e);
58621 let r =
58622 _mm_maskz_sub_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58623 let e = _mm_set_ps(1., 2., 10., -20.);
58624 assert_eq_m128(r, e);
58625 }
58626
58627 #[simd_test(enable = "avx512f")]
58628 unsafe fn test_mm_sub_round_sd() {
58629 let a = _mm_set_pd(1., 2.);
58630 let b = _mm_set_pd(3., 4.);
58631 let r = _mm_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58632 let e = _mm_set_pd(1., -2.);
58633 assert_eq_m128d(r, e);
58634 }
58635
58636 #[simd_test(enable = "avx512f")]
58637 unsafe fn test_mm_mask_sub_round_sd() {
58638 let src = _mm_set_pd(10., 11.);
58639 let a = _mm_set_pd(1., 2.);
58640 let b = _mm_set_pd(3., 4.);
58641 let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58642 let e = _mm_set_pd(1., 11.);
58643 assert_eq_m128d(r, e);
58644 let r = _mm_mask_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58645 src, 0b11111111, a, b,
58646 );
58647 let e = _mm_set_pd(1., -2.);
58648 assert_eq_m128d(r, e);
58649 }
58650
58651 #[simd_test(enable = "avx512f")]
58652 unsafe fn test_mm_maskz_sub_round_sd() {
58653 let a = _mm_set_pd(1., 2.);
58654 let b = _mm_set_pd(3., 4.);
58655 let r = _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58656 let e = _mm_set_pd(1., 0.);
58657 assert_eq_m128d(r, e);
58658 let r =
58659 _mm_maskz_sub_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58660 let e = _mm_set_pd(1., -2.);
58661 assert_eq_m128d(r, e);
58662 }
58663
58664 #[simd_test(enable = "avx512f")]
58665 unsafe fn test_mm_mul_round_ss() {
58666 let a = _mm_set_ps(1., 2., 10., 20.);
58667 let b = _mm_set_ps(3., 4., 30., 40.);
58668 let r = _mm_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58669 let e = _mm_set_ps(1., 2., 10., 800.);
58670 assert_eq_m128(r, e);
58671 }
58672
58673 #[simd_test(enable = "avx512f")]
58674 unsafe fn test_mm_mask_mul_round_ss() {
58675 let src = _mm_set_ps(10., 11., 100., 110.);
58676 let a = _mm_set_ps(1., 2., 10., 20.);
58677 let b = _mm_set_ps(3., 4., 30., 40.);
58678 let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58679 let e = _mm_set_ps(1., 2., 10., 110.);
58680 assert_eq_m128(r, e);
58681 let r = _mm_mask_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58682 src, 0b11111111, a, b,
58683 );
58684 let e = _mm_set_ps(1., 2., 10., 800.);
58685 assert_eq_m128(r, e);
58686 }
58687
58688 #[simd_test(enable = "avx512f")]
58689 unsafe fn test_mm_maskz_mul_round_ss() {
58690 let a = _mm_set_ps(1., 2., 10., 20.);
58691 let b = _mm_set_ps(3., 4., 30., 40.);
58692 let r = _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58693 let e = _mm_set_ps(1., 2., 10., 0.);
58694 assert_eq_m128(r, e);
58695 let r =
58696 _mm_maskz_mul_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58697 let e = _mm_set_ps(1., 2., 10., 800.);
58698 assert_eq_m128(r, e);
58699 }
58700
58701 #[simd_test(enable = "avx512f")]
58702 unsafe fn test_mm_mul_round_sd() {
58703 let a = _mm_set_pd(1., 2.);
58704 let b = _mm_set_pd(3., 4.);
58705 let r = _mm_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58706 let e = _mm_set_pd(1., 8.);
58707 assert_eq_m128d(r, e);
58708 }
58709
58710 #[simd_test(enable = "avx512f")]
58711 unsafe fn test_mm_mask_mul_round_sd() {
58712 let src = _mm_set_pd(10., 11.);
58713 let a = _mm_set_pd(1., 2.);
58714 let b = _mm_set_pd(3., 4.);
58715 let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58716 let e = _mm_set_pd(1., 11.);
58717 assert_eq_m128d(r, e);
58718 let r = _mm_mask_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58719 src, 0b11111111, a, b,
58720 );
58721 let e = _mm_set_pd(1., 8.);
58722 assert_eq_m128d(r, e);
58723 }
58724
58725 #[simd_test(enable = "avx512f")]
58726 unsafe fn test_mm_maskz_mul_round_sd() {
58727 let a = _mm_set_pd(1., 2.);
58728 let b = _mm_set_pd(3., 4.);
58729 let r = _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58730 let e = _mm_set_pd(1., 0.);
58731 assert_eq_m128d(r, e);
58732 let r =
58733 _mm_maskz_mul_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58734 let e = _mm_set_pd(1., 8.);
58735 assert_eq_m128d(r, e);
58736 }
58737
58738 #[simd_test(enable = "avx512f")]
58739 unsafe fn test_mm_div_round_ss() {
58740 let a = _mm_set_ps(1., 2., 10., 20.);
58741 let b = _mm_set_ps(3., 4., 30., 40.);
58742 let r = _mm_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58743 let e = _mm_set_ps(1., 2., 10., 0.5);
58744 assert_eq_m128(r, e);
58745 }
58746
58747 #[simd_test(enable = "avx512f")]
58748 unsafe fn test_mm_mask_div_round_ss() {
58749 let src = _mm_set_ps(10., 11., 100., 110.);
58750 let a = _mm_set_ps(1., 2., 10., 20.);
58751 let b = _mm_set_ps(3., 4., 30., 40.);
58752 let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58753 let e = _mm_set_ps(1., 2., 10., 110.);
58754 assert_eq_m128(r, e);
58755 let r = _mm_mask_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58756 src, 0b11111111, a, b,
58757 );
58758 let e = _mm_set_ps(1., 2., 10., 0.5);
58759 assert_eq_m128(r, e);
58760 }
58761
58762 #[simd_test(enable = "avx512f")]
58763 unsafe fn test_mm_maskz_div_round_ss() {
58764 let a = _mm_set_ps(1., 2., 10., 20.);
58765 let b = _mm_set_ps(3., 4., 30., 40.);
58766 let r = _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58767 let e = _mm_set_ps(1., 2., 10., 0.);
58768 assert_eq_m128(r, e);
58769 let r =
58770 _mm_maskz_div_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58771 let e = _mm_set_ps(1., 2., 10., 0.5);
58772 assert_eq_m128(r, e);
58773 }
58774
58775 #[simd_test(enable = "avx512f")]
58776 unsafe fn test_mm_div_round_sd() {
58777 let a = _mm_set_pd(1., 2.);
58778 let b = _mm_set_pd(3., 4.);
58779 let r = _mm_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58780 let e = _mm_set_pd(1., 0.5);
58781 assert_eq_m128d(r, e);
58782 }
58783
58784 #[simd_test(enable = "avx512f")]
58785 unsafe fn test_mm_mask_div_round_sd() {
58786 let src = _mm_set_pd(10., 11.);
58787 let a = _mm_set_pd(1., 2.);
58788 let b = _mm_set_pd(3., 4.);
58789 let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58790 let e = _mm_set_pd(1., 11.);
58791 assert_eq_m128d(r, e);
58792 let r = _mm_mask_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58793 src, 0b11111111, a, b,
58794 );
58795 let e = _mm_set_pd(1., 0.5);
58796 assert_eq_m128d(r, e);
58797 }
58798
58799 #[simd_test(enable = "avx512f")]
58800 unsafe fn test_mm_maskz_div_round_sd() {
58801 let a = _mm_set_pd(1., 2.);
58802 let b = _mm_set_pd(3., 4.);
58803 let r = _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58804 let e = _mm_set_pd(1., 0.);
58805 assert_eq_m128d(r, e);
58806 let r =
58807 _mm_maskz_div_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58808 let e = _mm_set_pd(1., 0.5);
58809 assert_eq_m128d(r, e);
58810 }
58811
58812 #[simd_test(enable = "avx512f")]
58813 unsafe fn test_mm_max_round_ss() {
58814 let a = _mm_set_ps(0., 1., 2., 3.);
58815 let b = _mm_set_ps(4., 5., 6., 7.);
58816 let r = _mm_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
58817 let e = _mm_set_ps(0., 1., 2., 7.);
58818 assert_eq_m128(r, e);
58819 }
58820
58821 #[simd_test(enable = "avx512f")]
58822 unsafe fn test_mm_mask_max_round_ss() {
58823 let a = _mm_set_ps(0., 1., 2., 3.);
58824 let b = _mm_set_ps(4., 5., 6., 7.);
58825 let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58826 let e = _mm_set_ps(0., 1., 2., 3.);
58827 assert_eq_m128(r, e);
58828 let r = _mm_mask_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58829 let e = _mm_set_ps(0., 1., 2., 7.);
58830 assert_eq_m128(r, e);
58831 }
58832
58833 #[simd_test(enable = "avx512f")]
58834 unsafe fn test_mm_maskz_max_round_ss() {
58835 let a = _mm_set_ps(0., 1., 2., 3.);
58836 let b = _mm_set_ps(4., 5., 6., 7.);
58837 let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58838 let e = _mm_set_ps(0., 1., 2., 0.);
58839 assert_eq_m128(r, e);
58840 let r = _mm_maskz_max_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58841 let e = _mm_set_ps(0., 1., 2., 7.);
58842 assert_eq_m128(r, e);
58843 }
58844
58845 #[simd_test(enable = "avx512f")]
58846 unsafe fn test_mm_max_round_sd() {
58847 let a = _mm_set_pd(0., 1.);
58848 let b = _mm_set_pd(2., 3.);
58849 let r = _mm_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
58850 let e = _mm_set_pd(0., 3.);
58851 assert_eq_m128d(r, e);
58852 }
58853
58854 #[simd_test(enable = "avx512f")]
58855 unsafe fn test_mm_mask_max_round_sd() {
58856 let a = _mm_set_pd(0., 1.);
58857 let b = _mm_set_pd(2., 3.);
58858 let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58859 let e = _mm_set_pd(0., 1.);
58860 assert_eq_m128d(r, e);
58861 let r = _mm_mask_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58862 let e = _mm_set_pd(0., 3.);
58863 assert_eq_m128d(r, e);
58864 }
58865
58866 #[simd_test(enable = "avx512f")]
58867 unsafe fn test_mm_maskz_max_round_sd() {
58868 let a = _mm_set_pd(0., 1.);
58869 let b = _mm_set_pd(2., 3.);
58870 let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58871 let e = _mm_set_pd(0., 0.);
58872 assert_eq_m128d(r, e);
58873 let r = _mm_maskz_max_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58874 let e = _mm_set_pd(0., 3.);
58875 assert_eq_m128d(r, e);
58876 }
58877
58878 #[simd_test(enable = "avx512f")]
58879 unsafe fn test_mm_min_round_ss() {
58880 let a = _mm_set_ps(0., 1., 2., 3.);
58881 let b = _mm_set_ps(4., 5., 6., 7.);
58882 let r = _mm_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
58883 let e = _mm_set_ps(0., 1., 2., 3.);
58884 assert_eq_m128(r, e);
58885 }
58886
58887 #[simd_test(enable = "avx512f")]
58888 unsafe fn test_mm_mask_min_round_ss() {
58889 let a = _mm_set_ps(0., 1., 2., 3.);
58890 let b = _mm_set_ps(4., 5., 6., 7.);
58891 let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58892 let e = _mm_set_ps(0., 1., 2., 3.);
58893 assert_eq_m128(r, e);
58894 let r = _mm_mask_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58895 let e = _mm_set_ps(0., 1., 2., 3.);
58896 assert_eq_m128(r, e);
58897 }
58898
58899 #[simd_test(enable = "avx512f")]
58900 unsafe fn test_mm_maskz_min_round_ss() {
58901 let a = _mm_set_ps(0., 1., 2., 3.);
58902 let b = _mm_set_ps(4., 5., 6., 7.);
58903 let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58904 let e = _mm_set_ps(0., 1., 2., 0.);
58905 assert_eq_m128(r, e);
58906 let r = _mm_maskz_min_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58907 let e = _mm_set_ps(0., 1., 2., 3.);
58908 assert_eq_m128(r, e);
58909 }
58910
58911 #[simd_test(enable = "avx512f")]
58912 unsafe fn test_mm_min_round_sd() {
58913 let a = _mm_set_pd(0., 1.);
58914 let b = _mm_set_pd(2., 3.);
58915 let r = _mm_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
58916 let e = _mm_set_pd(0., 1.);
58917 assert_eq_m128d(r, e);
58918 }
58919
58920 #[simd_test(enable = "avx512f")]
58921 unsafe fn test_mm_mask_min_round_sd() {
58922 let a = _mm_set_pd(0., 1.);
58923 let b = _mm_set_pd(2., 3.);
58924 let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
58925 let e = _mm_set_pd(0., 1.);
58926 assert_eq_m128d(r, e);
58927 let r = _mm_mask_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
58928 let e = _mm_set_pd(0., 1.);
58929 assert_eq_m128d(r, e);
58930 }
58931
58932 #[simd_test(enable = "avx512f")]
58933 unsafe fn test_mm_maskz_min_round_sd() {
58934 let a = _mm_set_pd(0., 1.);
58935 let b = _mm_set_pd(2., 3.);
58936 let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
58937 let e = _mm_set_pd(0., 0.);
58938 assert_eq_m128d(r, e);
58939 let r = _mm_maskz_min_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
58940 let e = _mm_set_pd(0., 1.);
58941 assert_eq_m128d(r, e);
58942 }
58943
58944 #[simd_test(enable = "avx512f")]
58945 unsafe fn test_mm_sqrt_round_ss() {
58946 let a = _mm_set_ps(1., 2., 10., 20.);
58947 let b = _mm_set_ps(3., 4., 30., 4.);
58948 let r = _mm_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58949 let e = _mm_set_ps(1., 2., 10., 2.);
58950 assert_eq_m128(r, e);
58951 }
58952
58953 #[simd_test(enable = "avx512f")]
58954 unsafe fn test_mm_mask_sqrt_round_ss() {
58955 let src = _mm_set_ps(10., 11., 100., 110.);
58956 let a = _mm_set_ps(1., 2., 10., 20.);
58957 let b = _mm_set_ps(3., 4., 30., 4.);
58958 let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58959 let e = _mm_set_ps(1., 2., 10., 110.);
58960 assert_eq_m128(r, e);
58961 let r = _mm_mask_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58962 src, 0b11111111, a, b,
58963 );
58964 let e = _mm_set_ps(1., 2., 10., 2.);
58965 assert_eq_m128(r, e);
58966 }
58967
58968 #[simd_test(enable = "avx512f")]
58969 unsafe fn test_mm_maskz_sqrt_round_ss() {
58970 let a = _mm_set_ps(1., 2., 10., 20.);
58971 let b = _mm_set_ps(3., 4., 30., 4.);
58972 let r = _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
58973 let e = _mm_set_ps(1., 2., 10., 0.);
58974 assert_eq_m128(r, e);
58975 let r =
58976 _mm_maskz_sqrt_round_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
58977 let e = _mm_set_ps(1., 2., 10., 2.);
58978 assert_eq_m128(r, e);
58979 }
58980
58981 #[simd_test(enable = "avx512f")]
58982 unsafe fn test_mm_sqrt_round_sd() {
58983 let a = _mm_set_pd(1., 2.);
58984 let b = _mm_set_pd(3., 4.);
58985 let r = _mm_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
58986 let e = _mm_set_pd(1., 2.);
58987 assert_eq_m128d(r, e);
58988 }
58989
58990 #[simd_test(enable = "avx512f")]
58991 unsafe fn test_mm_mask_sqrt_round_sd() {
58992 let src = _mm_set_pd(10., 11.);
58993 let a = _mm_set_pd(1., 2.);
58994 let b = _mm_set_pd(3., 4.);
58995 let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(src, 0, a, b);
58996 let e = _mm_set_pd(1., 11.);
58997 assert_eq_m128d(r, e);
58998 let r = _mm_mask_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
58999 src, 0b11111111, a, b,
59000 );
59001 let e = _mm_set_pd(1., 2.);
59002 assert_eq_m128d(r, e);
59003 }
59004
59005 #[simd_test(enable = "avx512f")]
59006 unsafe fn test_mm_maskz_sqrt_round_sd() {
59007 let a = _mm_set_pd(1., 2.);
59008 let b = _mm_set_pd(3., 4.);
59009 let r = _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
59010 let e = _mm_set_pd(1., 0.);
59011 assert_eq_m128d(r, e);
59012 let r =
59013 _mm_maskz_sqrt_round_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0b11111111, a, b);
59014 let e = _mm_set_pd(1., 2.);
59015 assert_eq_m128d(r, e);
59016 }
59017
59018 #[simd_test(enable = "avx512f")]
59019 unsafe fn test_mm_getexp_round_ss() {
59020 let a = _mm_set1_ps(2.);
59021 let b = _mm_set1_ps(3.);
59022 let r = _mm_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, b);
59023 let e = _mm_set_ps(2., 2., 2., 1.);
59024 assert_eq_m128(r, e);
59025 }
59026
59027 #[simd_test(enable = "avx512f")]
59028 unsafe fn test_mm_mask_getexp_round_ss() {
59029 let a = _mm_set1_ps(2.);
59030 let b = _mm_set1_ps(3.);
59031 let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59032 let e = _mm_set_ps(2., 2., 2., 2.);
59033 assert_eq_m128(r, e);
59034 let r = _mm_mask_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59035 let e = _mm_set_ps(2., 2., 2., 1.);
59036 assert_eq_m128(r, e);
59037 }
59038
59039 #[simd_test(enable = "avx512f")]
59040 unsafe fn test_mm_maskz_getexp_round_ss() {
59041 let a = _mm_set1_ps(2.);
59042 let b = _mm_set1_ps(3.);
59043 let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
59044 let e = _mm_set_ps(2., 2., 2., 0.);
59045 assert_eq_m128(r, e);
59046 let r = _mm_maskz_getexp_round_ss::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59047 let e = _mm_set_ps(2., 2., 2., 1.);
59048 assert_eq_m128(r, e);
59049 }
59050
59051 #[simd_test(enable = "avx512f")]
59052 unsafe fn test_mm_getexp_round_sd() {
59053 let a = _mm_set1_pd(2.);
59054 let b = _mm_set1_pd(3.);
59055 let r = _mm_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
59056 let e = _mm_set_pd(2., 1.);
59057 assert_eq_m128d(r, e);
59058 }
59059
59060 #[simd_test(enable = "avx512f")]
59061 unsafe fn test_mm_mask_getexp_round_sd() {
59062 let a = _mm_set1_pd(2.);
59063 let b = _mm_set1_pd(3.);
59064 let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59065 let e = _mm_set_pd(2., 2.);
59066 assert_eq_m128d(r, e);
59067 let r = _mm_mask_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59068 let e = _mm_set_pd(2., 1.);
59069 assert_eq_m128d(r, e);
59070 }
59071
59072 #[simd_test(enable = "avx512f")]
59073 unsafe fn test_mm_maskz_getexp_round_sd() {
59074 let a = _mm_set1_pd(2.);
59075 let b = _mm_set1_pd(3.);
59076 let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
59077 let e = _mm_set_pd(2., 0.);
59078 assert_eq_m128d(r, e);
59079 let r = _mm_maskz_getexp_round_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59080 let e = _mm_set_pd(2., 1.);
59081 assert_eq_m128d(r, e);
59082 }
59083
59084 #[simd_test(enable = "avx512f")]
59085 unsafe fn test_mm_getmant_round_ss() {
59086 let a = _mm_set1_ps(20.);
59087 let b = _mm_set1_ps(10.);
59088 let r =
59089 _mm_getmant_round_ss::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
59090 a, b,
59091 );
59092 let e = _mm_set_ps(20., 20., 20., 1.25);
59093 assert_eq_m128(r, e);
59094 }
59095
59096 #[simd_test(enable = "avx512f")]
59097 unsafe fn test_mm_mask_getmant_round_ss() {
59098 let a = _mm_set1_ps(20.);
59099 let b = _mm_set1_ps(10.);
59100 let r = _mm_mask_getmant_round_ss::<
59101 _MM_MANT_NORM_1_2,
59102 _MM_MANT_SIGN_SRC,
59103 _MM_FROUND_CUR_DIRECTION,
59104 >(a, 0, a, b);
59105 let e = _mm_set_ps(20., 20., 20., 20.);
59106 assert_eq_m128(r, e);
59107 let r = _mm_mask_getmant_round_ss::<
59108 _MM_MANT_NORM_1_2,
59109 _MM_MANT_SIGN_SRC,
59110 _MM_FROUND_CUR_DIRECTION,
59111 >(a, 0b11111111, a, b);
59112 let e = _mm_set_ps(20., 20., 20., 1.25);
59113 assert_eq_m128(r, e);
59114 }
59115
59116 #[simd_test(enable = "avx512f")]
59117 unsafe fn test_mm_maskz_getmant_round_ss() {
59118 let a = _mm_set1_ps(20.);
59119 let b = _mm_set1_ps(10.);
59120 let r = _mm_maskz_getmant_round_ss::<
59121 _MM_MANT_NORM_1_2,
59122 _MM_MANT_SIGN_SRC,
59123 _MM_FROUND_CUR_DIRECTION,
59124 >(0, a, b);
59125 let e = _mm_set_ps(20., 20., 20., 0.);
59126 assert_eq_m128(r, e);
59127 let r = _mm_maskz_getmant_round_ss::<
59128 _MM_MANT_NORM_1_2,
59129 _MM_MANT_SIGN_SRC,
59130 _MM_FROUND_CUR_DIRECTION,
59131 >(0b11111111, a, b);
59132 let e = _mm_set_ps(20., 20., 20., 1.25);
59133 assert_eq_m128(r, e);
59134 }
59135
59136 #[simd_test(enable = "avx512f")]
59137 unsafe fn test_mm_getmant_round_sd() {
59138 let a = _mm_set1_pd(20.);
59139 let b = _mm_set1_pd(10.);
59140 let r =
59141 _mm_getmant_round_sd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC, _MM_FROUND_CUR_DIRECTION>(
59142 a, b,
59143 );
59144 let e = _mm_set_pd(20., 1.25);
59145 assert_eq_m128d(r, e);
59146 }
59147
59148 #[simd_test(enable = "avx512f")]
59149 unsafe fn test_mm_mask_getmant_round_sd() {
59150 let a = _mm_set1_pd(20.);
59151 let b = _mm_set1_pd(10.);
59152 let r = _mm_mask_getmant_round_sd::<
59153 _MM_MANT_NORM_1_2,
59154 _MM_MANT_SIGN_SRC,
59155 _MM_FROUND_CUR_DIRECTION,
59156 >(a, 0, a, b);
59157 let e = _mm_set_pd(20., 20.);
59158 assert_eq_m128d(r, e);
59159 let r = _mm_mask_getmant_round_sd::<
59160 _MM_MANT_NORM_1_2,
59161 _MM_MANT_SIGN_SRC,
59162 _MM_FROUND_CUR_DIRECTION,
59163 >(a, 0b11111111, a, b);
59164 let e = _mm_set_pd(20., 1.25);
59165 assert_eq_m128d(r, e);
59166 }
59167
59168 #[simd_test(enable = "avx512f")]
59169 unsafe fn test_mm_maskz_getmant_round_sd() {
59170 let a = _mm_set1_pd(20.);
59171 let b = _mm_set1_pd(10.);
59172 let r = _mm_maskz_getmant_round_sd::<
59173 _MM_MANT_NORM_1_2,
59174 _MM_MANT_SIGN_SRC,
59175 _MM_FROUND_CUR_DIRECTION,
59176 >(0, a, b);
59177 let e = _mm_set_pd(20., 0.);
59178 assert_eq_m128d(r, e);
59179 let r = _mm_maskz_getmant_round_sd::<
59180 _MM_MANT_NORM_1_2,
59181 _MM_MANT_SIGN_SRC,
59182 _MM_FROUND_CUR_DIRECTION,
59183 >(0b11111111, a, b);
59184 let e = _mm_set_pd(20., 1.25);
59185 assert_eq_m128d(r, e);
59186 }
59187
59188 #[simd_test(enable = "avx512f")]
59189 unsafe fn test_mm_roundscale_round_ss() {
59190 let a = _mm_set1_ps(2.2);
59191 let b = _mm_set1_ps(1.1);
59192 let r = _mm_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
59193 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59194 assert_eq_m128(r, e);
59195 }
59196
59197 #[simd_test(enable = "avx512f")]
59198 unsafe fn test_mm_mask_roundscale_round_ss() {
59199 let a = _mm_set1_ps(2.2);
59200 let b = _mm_set1_ps(1.1);
59201 let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59202 let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
59203 assert_eq_m128(r, e);
59204 let r = _mm_mask_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59205 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59206 assert_eq_m128(r, e);
59207 }
59208
59209 #[simd_test(enable = "avx512f")]
59210 unsafe fn test_mm_maskz_roundscale_round_ss() {
59211 let a = _mm_set1_ps(2.2);
59212 let b = _mm_set1_ps(1.1);
59213 let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
59214 let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
59215 assert_eq_m128(r, e);
59216 let r = _mm_maskz_roundscale_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59217 let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
59218 assert_eq_m128(r, e);
59219 }
59220
59221 #[simd_test(enable = "avx512f")]
59222 unsafe fn test_mm_roundscale_round_sd() {
59223 let a = _mm_set1_pd(2.2);
59224 let b = _mm_set1_pd(1.1);
59225 let r = _mm_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
59226 let e = _mm_set_pd(2.2, 1.0);
59227 assert_eq_m128d(r, e);
59228 }
59229
59230 #[simd_test(enable = "avx512f")]
59231 unsafe fn test_mm_mask_roundscale_round_sd() {
59232 let a = _mm_set1_pd(2.2);
59233 let b = _mm_set1_pd(1.1);
59234 let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59235 let e = _mm_set_pd(2.2, 2.2);
59236 assert_eq_m128d(r, e);
59237 let r = _mm_mask_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
59238 let e = _mm_set_pd(2.2, 1.0);
59239 assert_eq_m128d(r, e);
59240 }
59241
59242 #[simd_test(enable = "avx512f")]
59243 unsafe fn test_mm_maskz_roundscale_round_sd() {
59244 let a = _mm_set1_pd(2.2);
59245 let b = _mm_set1_pd(1.1);
59246 let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0, a, b);
59247 let e = _mm_set_pd(2.2, 0.0);
59248 assert_eq_m128d(r, e);
59249 let r = _mm_maskz_roundscale_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
59250 let e = _mm_set_pd(2.2, 1.0);
59251 assert_eq_m128d(r, e);
59252 }
59253
59254 #[simd_test(enable = "avx512f")]
59255 unsafe fn test_mm_scalef_round_ss() {
59256 let a = _mm_set1_ps(1.);
59257 let b = _mm_set1_ps(3.);
59258 let r = _mm_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
59259 let e = _mm_set_ps(1., 1., 1., 8.);
59260 assert_eq_m128(r, e);
59261 }
59262
59263 #[simd_test(enable = "avx512f")]
59264 unsafe fn test_mm_mask_scalef_round_ss() {
59265 let a = _mm_set1_ps(1.);
59266 let b = _mm_set1_ps(3.);
59267 let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59268 a, 0, a, b,
59269 );
59270 let e = _mm_set_ps(1., 1., 1., 1.);
59271 assert_eq_m128(r, e);
59272 let r = _mm_mask_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59273 a, 0b11111111, a, b,
59274 );
59275 let e = _mm_set_ps(1., 1., 1., 8.);
59276 assert_eq_m128(r, e);
59277 }
59278
59279 #[simd_test(enable = "avx512f")]
59280 unsafe fn test_mm_maskz_scalef_round_ss() {
59281 let a = _mm_set1_ps(1.);
59282 let b = _mm_set1_ps(3.);
59283 let r =
59284 _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
59285 let e = _mm_set_ps(1., 1., 1., 0.);
59286 assert_eq_m128(r, e);
59287 let r = _mm_maskz_scalef_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59288 0b11111111, a, b,
59289 );
59290 let e = _mm_set_ps(1., 1., 1., 8.);
59291 assert_eq_m128(r, e);
59292 }
59293
59294 #[simd_test(enable = "avx512f")]
59295 unsafe fn test_mm_scalef_round_sd() {
59296 let a = _mm_set1_pd(1.);
59297 let b = _mm_set1_pd(3.);
59298 let r = _mm_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
59299 let e = _mm_set_pd(1., 8.);
59300 assert_eq_m128d(r, e);
59301 }
59302
59303 #[simd_test(enable = "avx512f")]
59304 unsafe fn test_mm_mask_scalef_round_sd() {
59305 let a = _mm_set1_pd(1.);
59306 let b = _mm_set1_pd(3.);
59307 let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59308 a, 0, a, b,
59309 );
59310 let e = _mm_set_pd(1., 1.);
59311 assert_eq_m128d(r, e);
59312 let r = _mm_mask_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59313 a, 0b11111111, a, b,
59314 );
59315 let e = _mm_set_pd(1., 8.);
59316 assert_eq_m128d(r, e);
59317 }
59318
59319 #[simd_test(enable = "avx512f")]
59320 unsafe fn test_mm_maskz_scalef_round_sd() {
59321 let a = _mm_set1_pd(1.);
59322 let b = _mm_set1_pd(3.);
59323 let r =
59324 _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
59325 let e = _mm_set_pd(1., 0.);
59326 assert_eq_m128d(r, e);
59327 let r = _mm_maskz_scalef_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59328 0b11111111, a, b,
59329 );
59330 let e = _mm_set_pd(1., 8.);
59331 assert_eq_m128d(r, e);
59332 }
59333
59334 #[simd_test(enable = "avx512f")]
59335 unsafe fn test_mm_fmadd_round_ss() {
59336 let a = _mm_set1_ps(1.);
59337 let b = _mm_set1_ps(2.);
59338 let c = _mm_set1_ps(3.);
59339 let r = _mm_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59340 let e = _mm_set_ps(1., 1., 1., 5.);
59341 assert_eq_m128(r, e);
59342 }
59343
59344 #[simd_test(enable = "avx512f")]
59345 unsafe fn test_mm_mask_fmadd_round_ss() {
59346 let a = _mm_set1_ps(1.);
59347 let b = _mm_set1_ps(2.);
59348 let c = _mm_set1_ps(3.);
59349 let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59350 a, 0, b, c,
59351 );
59352 assert_eq_m128(r, a);
59353 let r = _mm_mask_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59354 a, 0b11111111, b, c,
59355 );
59356 let e = _mm_set_ps(1., 1., 1., 5.);
59357 assert_eq_m128(r, e);
59358 }
59359
59360 #[simd_test(enable = "avx512f")]
59361 unsafe fn test_mm_maskz_fmadd_round_ss() {
59362 let a = _mm_set1_ps(1.);
59363 let b = _mm_set1_ps(2.);
59364 let c = _mm_set1_ps(3.);
59365 let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59366 0, a, b, c,
59367 );
59368 let e = _mm_set_ps(1., 1., 1., 0.);
59369 assert_eq_m128(r, e);
59370 let r = _mm_maskz_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59371 0b11111111, a, b, c,
59372 );
59373 let e = _mm_set_ps(1., 1., 1., 5.);
59374 assert_eq_m128(r, e);
59375 }
59376
59377 #[simd_test(enable = "avx512f")]
59378 unsafe fn test_mm_mask3_fmadd_round_ss() {
59379 let a = _mm_set1_ps(1.);
59380 let b = _mm_set1_ps(2.);
59381 let c = _mm_set1_ps(3.);
59382 let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59383 a, b, c, 0,
59384 );
59385 assert_eq_m128(r, c);
59386 let r = _mm_mask3_fmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59387 a, b, c, 0b11111111,
59388 );
59389 let e = _mm_set_ps(3., 3., 3., 5.);
59390 assert_eq_m128(r, e);
59391 }
59392
59393 #[simd_test(enable = "avx512f")]
59394 unsafe fn test_mm_fmadd_round_sd() {
59395 let a = _mm_set1_pd(1.);
59396 let b = _mm_set1_pd(2.);
59397 let c = _mm_set1_pd(3.);
59398 let r = _mm_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59399 let e = _mm_set_pd(1., 5.);
59400 assert_eq_m128d(r, e);
59401 }
59402
59403 #[simd_test(enable = "avx512f")]
59404 unsafe fn test_mm_mask_fmadd_round_sd() {
59405 let a = _mm_set1_pd(1.);
59406 let b = _mm_set1_pd(2.);
59407 let c = _mm_set1_pd(3.);
59408 let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59409 a, 0, b, c,
59410 );
59411 assert_eq_m128d(r, a);
59412 let r = _mm_mask_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59413 a, 0b11111111, b, c,
59414 );
59415 let e = _mm_set_pd(1., 5.);
59416 assert_eq_m128d(r, e);
59417 }
59418
59419 #[simd_test(enable = "avx512f")]
59420 unsafe fn test_mm_maskz_fmadd_round_sd() {
59421 let a = _mm_set1_pd(1.);
59422 let b = _mm_set1_pd(2.);
59423 let c = _mm_set1_pd(3.);
59424 let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59425 0, a, b, c,
59426 );
59427 let e = _mm_set_pd(1., 0.);
59428 assert_eq_m128d(r, e);
59429 let r = _mm_maskz_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59430 0b11111111, a, b, c,
59431 );
59432 let e = _mm_set_pd(1., 5.);
59433 assert_eq_m128d(r, e);
59434 }
59435
59436 #[simd_test(enable = "avx512f")]
59437 unsafe fn test_mm_mask3_fmadd_round_sd() {
59438 let a = _mm_set1_pd(1.);
59439 let b = _mm_set1_pd(2.);
59440 let c = _mm_set1_pd(3.);
59441 let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59442 a, b, c, 0,
59443 );
59444 assert_eq_m128d(r, c);
59445 let r = _mm_mask3_fmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59446 a, b, c, 0b11111111,
59447 );
59448 let e = _mm_set_pd(3., 5.);
59449 assert_eq_m128d(r, e);
59450 }
59451
59452 #[simd_test(enable = "avx512f")]
59453 unsafe fn test_mm_fmsub_round_ss() {
59454 let a = _mm_set1_ps(1.);
59455 let b = _mm_set1_ps(2.);
59456 let c = _mm_set1_ps(3.);
59457 let r = _mm_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59458 let e = _mm_set_ps(1., 1., 1., -1.);
59459 assert_eq_m128(r, e);
59460 }
59461
59462 #[simd_test(enable = "avx512f")]
59463 unsafe fn test_mm_mask_fmsub_round_ss() {
59464 let a = _mm_set1_ps(1.);
59465 let b = _mm_set1_ps(2.);
59466 let c = _mm_set1_ps(3.);
59467 let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59468 a, 0, b, c,
59469 );
59470 assert_eq_m128(r, a);
59471 let r = _mm_mask_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59472 a, 0b11111111, b, c,
59473 );
59474 let e = _mm_set_ps(1., 1., 1., -1.);
59475 assert_eq_m128(r, e);
59476 }
59477
59478 #[simd_test(enable = "avx512f")]
59479 unsafe fn test_mm_maskz_fmsub_round_ss() {
59480 let a = _mm_set1_ps(1.);
59481 let b = _mm_set1_ps(2.);
59482 let c = _mm_set1_ps(3.);
59483 let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59484 0, a, b, c,
59485 );
59486 let e = _mm_set_ps(1., 1., 1., 0.);
59487 assert_eq_m128(r, e);
59488 let r = _mm_maskz_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59489 0b11111111, a, b, c,
59490 );
59491 let e = _mm_set_ps(1., 1., 1., -1.);
59492 assert_eq_m128(r, e);
59493 }
59494
59495 #[simd_test(enable = "avx512f")]
59496 unsafe fn test_mm_mask3_fmsub_round_ss() {
59497 let a = _mm_set1_ps(1.);
59498 let b = _mm_set1_ps(2.);
59499 let c = _mm_set1_ps(3.);
59500 let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59501 a, b, c, 0,
59502 );
59503 assert_eq_m128(r, c);
59504 let r = _mm_mask3_fmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59505 a, b, c, 0b11111111,
59506 );
59507 let e = _mm_set_ps(3., 3., 3., -1.);
59508 assert_eq_m128(r, e);
59509 }
59510
59511 #[simd_test(enable = "avx512f")]
59512 unsafe fn test_mm_fmsub_round_sd() {
59513 let a = _mm_set1_pd(1.);
59514 let b = _mm_set1_pd(2.);
59515 let c = _mm_set1_pd(3.);
59516 let r = _mm_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59517 let e = _mm_set_pd(1., -1.);
59518 assert_eq_m128d(r, e);
59519 }
59520
59521 #[simd_test(enable = "avx512f")]
59522 unsafe fn test_mm_mask_fmsub_round_sd() {
59523 let a = _mm_set1_pd(1.);
59524 let b = _mm_set1_pd(2.);
59525 let c = _mm_set1_pd(3.);
59526 let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59527 a, 0, b, c,
59528 );
59529 assert_eq_m128d(r, a);
59530 let r = _mm_mask_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59531 a, 0b11111111, b, c,
59532 );
59533 let e = _mm_set_pd(1., -1.);
59534 assert_eq_m128d(r, e);
59535 }
59536
59537 #[simd_test(enable = "avx512f")]
59538 unsafe fn test_mm_maskz_fmsub_round_sd() {
59539 let a = _mm_set1_pd(1.);
59540 let b = _mm_set1_pd(2.);
59541 let c = _mm_set1_pd(3.);
59542 let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59543 0, a, b, c,
59544 );
59545 let e = _mm_set_pd(1., 0.);
59546 assert_eq_m128d(r, e);
59547 let r = _mm_maskz_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59548 0b11111111, a, b, c,
59549 );
59550 let e = _mm_set_pd(1., -1.);
59551 assert_eq_m128d(r, e);
59552 }
59553
59554 #[simd_test(enable = "avx512f")]
59555 unsafe fn test_mm_mask3_fmsub_round_sd() {
59556 let a = _mm_set1_pd(1.);
59557 let b = _mm_set1_pd(2.);
59558 let c = _mm_set1_pd(3.);
59559 let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59560 a, b, c, 0,
59561 );
59562 assert_eq_m128d(r, c);
59563 let r = _mm_mask3_fmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59564 a, b, c, 0b11111111,
59565 );
59566 let e = _mm_set_pd(3., -1.);
59567 assert_eq_m128d(r, e);
59568 }
59569
59570 #[simd_test(enable = "avx512f")]
59571 unsafe fn test_mm_fnmadd_round_ss() {
59572 let a = _mm_set1_ps(1.);
59573 let b = _mm_set1_ps(2.);
59574 let c = _mm_set1_ps(3.);
59575 let r = _mm_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59576 let e = _mm_set_ps(1., 1., 1., 1.);
59577 assert_eq_m128(r, e);
59578 }
59579
59580 #[simd_test(enable = "avx512f")]
59581 unsafe fn test_mm_mask_fnmadd_round_ss() {
59582 let a = _mm_set1_ps(1.);
59583 let b = _mm_set1_ps(2.);
59584 let c = _mm_set1_ps(3.);
59585 let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59586 a, 0, b, c,
59587 );
59588 assert_eq_m128(r, a);
59589 let r = _mm_mask_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59590 a, 0b11111111, b, c,
59591 );
59592 let e = _mm_set_ps(1., 1., 1., 1.);
59593 assert_eq_m128(r, e);
59594 }
59595
59596 #[simd_test(enable = "avx512f")]
59597 unsafe fn test_mm_maskz_fnmadd_round_ss() {
59598 let a = _mm_set1_ps(1.);
59599 let b = _mm_set1_ps(2.);
59600 let c = _mm_set1_ps(3.);
59601 let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59602 0, a, b, c,
59603 );
59604 let e = _mm_set_ps(1., 1., 1., 0.);
59605 assert_eq_m128(r, e);
59606 let r = _mm_maskz_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59607 0b11111111, a, b, c,
59608 );
59609 let e = _mm_set_ps(1., 1., 1., 1.);
59610 assert_eq_m128(r, e);
59611 }
59612
59613 #[simd_test(enable = "avx512f")]
59614 unsafe fn test_mm_mask3_fnmadd_round_ss() {
59615 let a = _mm_set1_ps(1.);
59616 let b = _mm_set1_ps(2.);
59617 let c = _mm_set1_ps(3.);
59618 let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59619 a, b, c, 0,
59620 );
59621 assert_eq_m128(r, c);
59622 let r = _mm_mask3_fnmadd_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59623 a, b, c, 0b11111111,
59624 );
59625 let e = _mm_set_ps(3., 3., 3., 1.);
59626 assert_eq_m128(r, e);
59627 }
59628
59629 #[simd_test(enable = "avx512f")]
59630 unsafe fn test_mm_fnmadd_round_sd() {
59631 let a = _mm_set1_pd(1.);
59632 let b = _mm_set1_pd(2.);
59633 let c = _mm_set1_pd(3.);
59634 let r = _mm_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59635 let e = _mm_set_pd(1., 1.);
59636 assert_eq_m128d(r, e);
59637 }
59638
59639 #[simd_test(enable = "avx512f")]
59640 unsafe fn test_mm_mask_fnmadd_round_sd() {
59641 let a = _mm_set1_pd(1.);
59642 let b = _mm_set1_pd(2.);
59643 let c = _mm_set1_pd(3.);
59644 let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59645 a, 0, b, c,
59646 );
59647 assert_eq_m128d(r, a);
59648 let r = _mm_mask_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59649 a, 0b11111111, b, c,
59650 );
59651 let e = _mm_set_pd(1., 1.);
59652 assert_eq_m128d(r, e);
59653 }
59654
59655 #[simd_test(enable = "avx512f")]
59656 unsafe fn test_mm_maskz_fnmadd_round_sd() {
59657 let a = _mm_set1_pd(1.);
59658 let b = _mm_set1_pd(2.);
59659 let c = _mm_set1_pd(3.);
59660 let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59661 0, a, b, c,
59662 );
59663 let e = _mm_set_pd(1., 0.);
59664 assert_eq_m128d(r, e);
59665 let r = _mm_maskz_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59666 0b11111111, a, b, c,
59667 );
59668 let e = _mm_set_pd(1., 1.);
59669 assert_eq_m128d(r, e);
59670 }
59671
59672 #[simd_test(enable = "avx512f")]
59673 unsafe fn test_mm_mask3_fnmadd_round_sd() {
59674 let a = _mm_set1_pd(1.);
59675 let b = _mm_set1_pd(2.);
59676 let c = _mm_set1_pd(3.);
59677 let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59678 a, b, c, 0,
59679 );
59680 assert_eq_m128d(r, c);
59681 let r = _mm_mask3_fnmadd_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59682 a, b, c, 0b11111111,
59683 );
59684 let e = _mm_set_pd(3., 1.);
59685 assert_eq_m128d(r, e);
59686 }
59687
59688 #[simd_test(enable = "avx512f")]
59689 unsafe fn test_mm_fnmsub_round_ss() {
59690 let a = _mm_set1_ps(1.);
59691 let b = _mm_set1_ps(2.);
59692 let c = _mm_set1_ps(3.);
59693 let r = _mm_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59694 let e = _mm_set_ps(1., 1., 1., -5.);
59695 assert_eq_m128(r, e);
59696 }
59697
59698 #[simd_test(enable = "avx512f")]
59699 unsafe fn test_mm_mask_fnmsub_round_ss() {
59700 let a = _mm_set1_ps(1.);
59701 let b = _mm_set1_ps(2.);
59702 let c = _mm_set1_ps(3.);
59703 let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59704 a, 0, b, c,
59705 );
59706 assert_eq_m128(r, a);
59707 let r = _mm_mask_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59708 a, 0b11111111, b, c,
59709 );
59710 let e = _mm_set_ps(1., 1., 1., -5.);
59711 assert_eq_m128(r, e);
59712 }
59713
59714 #[simd_test(enable = "avx512f")]
59715 unsafe fn test_mm_maskz_fnmsub_round_ss() {
59716 let a = _mm_set1_ps(1.);
59717 let b = _mm_set1_ps(2.);
59718 let c = _mm_set1_ps(3.);
59719 let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59720 0, a, b, c,
59721 );
59722 let e = _mm_set_ps(1., 1., 1., 0.);
59723 assert_eq_m128(r, e);
59724 let r = _mm_maskz_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59725 0b11111111, a, b, c,
59726 );
59727 let e = _mm_set_ps(1., 1., 1., -5.);
59728 assert_eq_m128(r, e);
59729 }
59730
59731 #[simd_test(enable = "avx512f")]
59732 unsafe fn test_mm_mask3_fnmsub_round_ss() {
59733 let a = _mm_set1_ps(1.);
59734 let b = _mm_set1_ps(2.);
59735 let c = _mm_set1_ps(3.);
59736 let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59737 a, b, c, 0,
59738 );
59739 assert_eq_m128(r, c);
59740 let r = _mm_mask3_fnmsub_round_ss::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59741 a, b, c, 0b11111111,
59742 );
59743 let e = _mm_set_ps(3., 3., 3., -5.);
59744 assert_eq_m128(r, e);
59745 }
59746
59747 #[simd_test(enable = "avx512f")]
59748 unsafe fn test_mm_fnmsub_round_sd() {
59749 let a = _mm_set1_pd(1.);
59750 let b = _mm_set1_pd(2.);
59751 let c = _mm_set1_pd(3.);
59752 let r = _mm_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
59753 let e = _mm_set_pd(1., -5.);
59754 assert_eq_m128d(r, e);
59755 }
59756
59757 #[simd_test(enable = "avx512f")]
59758 unsafe fn test_mm_mask_fnmsub_round_sd() {
59759 let a = _mm_set1_pd(1.);
59760 let b = _mm_set1_pd(2.);
59761 let c = _mm_set1_pd(3.);
59762 let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59763 a, 0, b, c,
59764 );
59765 assert_eq_m128d(r, a);
59766 let r = _mm_mask_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59767 a, 0b11111111, b, c,
59768 );
59769 let e = _mm_set_pd(1., -5.);
59770 assert_eq_m128d(r, e);
59771 }
59772
59773 #[simd_test(enable = "avx512f")]
59774 unsafe fn test_mm_maskz_fnmsub_round_sd() {
59775 let a = _mm_set1_pd(1.);
59776 let b = _mm_set1_pd(2.);
59777 let c = _mm_set1_pd(3.);
59778 let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59779 0, a, b, c,
59780 );
59781 let e = _mm_set_pd(1., 0.);
59782 assert_eq_m128d(r, e);
59783 let r = _mm_maskz_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59784 0b11111111, a, b, c,
59785 );
59786 let e = _mm_set_pd(1., -5.);
59787 assert_eq_m128d(r, e);
59788 }
59789
59790 #[simd_test(enable = "avx512f")]
59791 unsafe fn test_mm_mask3_fnmsub_round_sd() {
59792 let a = _mm_set1_pd(1.);
59793 let b = _mm_set1_pd(2.);
59794 let c = _mm_set1_pd(3.);
59795 let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59796 a, b, c, 0,
59797 );
59798 assert_eq_m128d(r, c);
59799 let r = _mm_mask3_fnmsub_round_sd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
59800 a, b, c, 0b11111111,
59801 );
59802 let e = _mm_set_pd(3., -5.);
59803 assert_eq_m128d(r, e);
59804 }
59805
59806 #[simd_test(enable = "avx512f")]
59807 unsafe fn test_mm_fixupimm_ss() {
59808 let a = _mm_set_ps(0., 0., 0., f32::NAN);
59809 let b = _mm_set1_ps(f32::MAX);
59810 let c = _mm_set1_epi32(i32::MAX);
59811 let r = _mm_fixupimm_ss::<5>(a, b, c);
59812 let e = _mm_set_ps(0., 0., 0., -0.0);
59813 assert_eq_m128(r, e);
59814 }
59815
59816 #[simd_test(enable = "avx512f")]
59817 unsafe fn test_mm_mask_fixupimm_ss() {
59818 let a = _mm_set_ps(0., 0., 0., f32::NAN);
59819 let b = _mm_set1_ps(f32::MAX);
59820 let c = _mm_set1_epi32(i32::MAX);
59821 let r = _mm_mask_fixupimm_ss::<5>(a, 0b11111111, b, c);
59822 let e = _mm_set_ps(0., 0., 0., -0.0);
59823 assert_eq_m128(r, e);
59824 }
59825
59826 #[simd_test(enable = "avx512f")]
59827 unsafe fn test_mm_maskz_fixupimm_ss() {
59828 let a = _mm_set_ps(0., 0., 0., f32::NAN);
59829 let b = _mm_set1_ps(f32::MAX);
59830 let c = _mm_set1_epi32(i32::MAX);
59831 let r = _mm_maskz_fixupimm_ss::<5>(0b00000000, a, b, c);
59832 let e = _mm_set_ps(0., 0., 0., 0.0);
59833 assert_eq_m128(r, e);
59834 let r = _mm_maskz_fixupimm_ss::<5>(0b11111111, a, b, c);
59835 let e = _mm_set_ps(0., 0., 0., -0.0);
59836 assert_eq_m128(r, e);
59837 }
59838
59839 #[simd_test(enable = "avx512f")]
59840 unsafe fn test_mm_fixupimm_sd() {
59841 let a = _mm_set_pd(0., f64::NAN);
59842 let b = _mm_set1_pd(f64::MAX);
59843 let c = _mm_set1_epi64x(i32::MAX as i64);
59844 let r = _mm_fixupimm_sd::<5>(a, b, c);
59845 let e = _mm_set_pd(0., -0.0);
59846 assert_eq_m128d(r, e);
59847 }
59848
59849 #[simd_test(enable = "avx512f")]
59850 unsafe fn test_mm_mask_fixupimm_sd() {
59851 let a = _mm_set_pd(0., f64::NAN);
59852 let b = _mm_set1_pd(f64::MAX);
59853 let c = _mm_set1_epi64x(i32::MAX as i64);
59854 let r = _mm_mask_fixupimm_sd::<5>(a, 0b11111111, b, c);
59855 let e = _mm_set_pd(0., -0.0);
59856 assert_eq_m128d(r, e);
59857 }
59858
59859 #[simd_test(enable = "avx512f")]
59860 unsafe fn test_mm_maskz_fixupimm_sd() {
59861 let a = _mm_set_pd(0., f64::NAN);
59862 let b = _mm_set1_pd(f64::MAX);
59863 let c = _mm_set1_epi64x(i32::MAX as i64);
59864 let r = _mm_maskz_fixupimm_sd::<5>(0b00000000, a, b, c);
59865 let e = _mm_set_pd(0., 0.0);
59866 assert_eq_m128d(r, e);
59867 let r = _mm_maskz_fixupimm_sd::<5>(0b11111111, a, b, c);
59868 let e = _mm_set_pd(0., -0.0);
59869 assert_eq_m128d(r, e);
59870 }
59871
59872 #[simd_test(enable = "avx512f")]
59873 unsafe fn test_mm_fixupimm_round_ss() {
59874 let a = _mm_set_ps(1., 0., 0., f32::NAN);
59875 let b = _mm_set1_ps(f32::MAX);
59876 let c = _mm_set1_epi32(i32::MAX);
59877 let r = _mm_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
59878 let e = _mm_set_ps(1., 0., 0., -0.0);
59879 assert_eq_m128(r, e);
59880 }
59881
59882 #[simd_test(enable = "avx512f")]
59883 unsafe fn test_mm_mask_fixupimm_round_ss() {
59884 let a = _mm_set_ps(0., 0., 0., f32::NAN);
59885 let b = _mm_set1_ps(f32::MAX);
59886 let c = _mm_set1_epi32(i32::MAX);
59887 let r = _mm_mask_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
59888 let e = _mm_set_ps(0., 0., 0., -0.0);
59889 assert_eq_m128(r, e);
59890 }
59891
59892 #[simd_test(enable = "avx512f")]
59893 unsafe fn test_mm_maskz_fixupimm_round_ss() {
59894 let a = _mm_set_ps(0., 0., 0., f32::NAN);
59895 let b = _mm_set1_ps(f32::MAX);
59896 let c = _mm_set1_epi32(i32::MAX);
59897 let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
59898 let e = _mm_set_ps(0., 0., 0., 0.0);
59899 assert_eq_m128(r, e);
59900 let r = _mm_maskz_fixupimm_round_ss::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
59901 let e = _mm_set_ps(0., 0., 0., -0.0);
59902 assert_eq_m128(r, e);
59903 }
59904
59905 #[simd_test(enable = "avx512f")]
59906 unsafe fn test_mm_fixupimm_round_sd() {
59907 let a = _mm_set_pd(0., f64::NAN);
59908 let b = _mm_set1_pd(f64::MAX);
59909 let c = _mm_set1_epi64x(i32::MAX as i64);
59910 let r = _mm_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
59911 let e = _mm_set_pd(0., -0.0);
59912 assert_eq_m128d(r, e);
59913 }
59914
59915 #[simd_test(enable = "avx512f")]
59916 unsafe fn test_mm_mask_fixupimm_round_sd() {
59917 let a = _mm_set_pd(0., f64::NAN);
59918 let b = _mm_set1_pd(f64::MAX);
59919 let c = _mm_set1_epi64x(i32::MAX as i64);
59920 let r = _mm_mask_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, b, c);
59921 let e = _mm_set_pd(0., -0.0);
59922 assert_eq_m128d(r, e);
59923 }
59924
59925 #[simd_test(enable = "avx512f")]
59926 unsafe fn test_mm_maskz_fixupimm_round_sd() {
59927 let a = _mm_set_pd(0., f64::NAN);
59928 let b = _mm_set1_pd(f64::MAX);
59929 let c = _mm_set1_epi64x(i32::MAX as i64);
59930 let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b00000000, a, b, c);
59931 let e = _mm_set_pd(0., 0.0);
59932 assert_eq_m128d(r, e);
59933 let r = _mm_maskz_fixupimm_round_sd::<5, _MM_FROUND_CUR_DIRECTION>(0b11111111, a, b, c);
59934 let e = _mm_set_pd(0., -0.0);
59935 assert_eq_m128d(r, e);
59936 }
59937
59938 #[simd_test(enable = "avx512f")]
59939 unsafe fn test_mm_mask_cvtss_sd() {
59940 let a = _mm_set_pd(6., -7.5);
59941 let b = _mm_set_ps(0., -0.5, 1., -1.5);
59942 let r = _mm_mask_cvtss_sd(a, 0, a, b);
59943 assert_eq_m128d(r, a);
59944 let r = _mm_mask_cvtss_sd(a, 0b11111111, a, b);
59945 let e = _mm_set_pd(6., -1.5);
59946 assert_eq_m128d(r, e);
59947 }
59948
59949 #[simd_test(enable = "avx512f")]
59950 unsafe fn test_mm_maskz_cvtss_sd() {
59951 let a = _mm_set_pd(6., -7.5);
59952 let b = _mm_set_ps(0., -0.5, 1., -1.5);
59953 let r = _mm_maskz_cvtss_sd(0, a, b);
59954 let e = _mm_set_pd(6., 0.);
59955 assert_eq_m128d(r, e);
59956 let r = _mm_maskz_cvtss_sd(0b11111111, a, b);
59957 let e = _mm_set_pd(6., -1.5);
59958 assert_eq_m128d(r, e);
59959 }
59960
59961 #[simd_test(enable = "avx512f")]
59962 unsafe fn test_mm_mask_cvtsd_ss() {
59963 let a = _mm_set_ps(0., -0.5, 1., -1.5);
59964 let b = _mm_set_pd(6., -7.5);
59965 let r = _mm_mask_cvtsd_ss(a, 0, a, b);
59966 assert_eq_m128(r, a);
59967 let r = _mm_mask_cvtsd_ss(a, 0b11111111, a, b);
59968 let e = _mm_set_ps(0., -0.5, 1., -7.5);
59969 assert_eq_m128(r, e);
59970 }
59971
59972 #[simd_test(enable = "avx512f")]
59973 unsafe fn test_mm_maskz_cvtsd_ss() {
59974 let a = _mm_set_ps(0., -0.5, 1., -1.5);
59975 let b = _mm_set_pd(6., -7.5);
59976 let r = _mm_maskz_cvtsd_ss(0, a, b);
59977 let e = _mm_set_ps(0., -0.5, 1., 0.);
59978 assert_eq_m128(r, e);
59979 let r = _mm_maskz_cvtsd_ss(0b11111111, a, b);
59980 let e = _mm_set_ps(0., -0.5, 1., -7.5);
59981 assert_eq_m128(r, e);
59982 }
59983
59984 #[simd_test(enable = "avx512f")]
59985 unsafe fn test_mm_cvt_roundss_sd() {
59986 let a = _mm_set_pd(6., -7.5);
59987 let b = _mm_set_ps(0., -0.5, 1., -1.5);
59988 let r = _mm_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, b);
59989 let e = _mm_set_pd(6., -1.5);
59990 assert_eq_m128d(r, e);
59991 }
59992
59993 #[simd_test(enable = "avx512f")]
59994 unsafe fn test_mm_mask_cvt_roundss_sd() {
59995 let a = _mm_set_pd(6., -7.5);
59996 let b = _mm_set_ps(0., -0.5, 1., -1.5);
59997 let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
59998 assert_eq_m128d(r, a);
59999 let r = _mm_mask_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a, b);
60000 let e = _mm_set_pd(6., -1.5);
60001 assert_eq_m128d(r, e);
60002 }
60003
60004 #[simd_test(enable = "avx512f")]
60005 unsafe fn test_mm_maskz_cvt_roundss_sd() {
60006 let a = _mm_set_pd(6., -7.5);
60007 let b = _mm_set_ps(0., -0.5, 1., -1.5);
60008 let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
60009 let e = _mm_set_pd(6., 0.);
60010 assert_eq_m128d(r, e);
60011 let r = _mm_maskz_cvt_roundss_sd::<_MM_FROUND_CUR_DIRECTION>(0b11111111, a, b);
60012 let e = _mm_set_pd(6., -1.5);
60013 assert_eq_m128d(r, e);
60014 }
60015
60016 #[simd_test(enable = "avx512f")]
60017 unsafe fn test_mm_cvt_roundsd_ss() {
60018 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60019 let b = _mm_set_pd(6., -7.5);
60020 let r = _mm_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60021 let e = _mm_set_ps(0., -0.5, 1., -7.5);
60022 assert_eq_m128(r, e);
60023 }
60024
60025 #[simd_test(enable = "avx512f")]
60026 unsafe fn test_mm_mask_cvt_roundsd_ss() {
60027 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60028 let b = _mm_set_pd(6., -7.5);
60029 let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, 0, a, b);
60030 assert_eq_m128(r, a);
60031 let r = _mm_mask_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60032 a, 0b11111111, a, b,
60033 );
60034 let e = _mm_set_ps(0., -0.5, 1., -7.5);
60035 assert_eq_m128(r, e);
60036 }
60037
60038 #[simd_test(enable = "avx512f")]
60039 unsafe fn test_mm_maskz_cvt_roundsd_ss() {
60040 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60041 let b = _mm_set_pd(6., -7.5);
60042 let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(0, a, b);
60043 let e = _mm_set_ps(0., -0.5, 1., 0.);
60044 assert_eq_m128(r, e);
60045 let r = _mm_maskz_cvt_roundsd_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(
60046 0b11111111, a, b,
60047 );
60048 let e = _mm_set_ps(0., -0.5, 1., -7.5);
60049 assert_eq_m128(r, e);
60050 }
60051
60052 #[simd_test(enable = "avx512f")]
60053 unsafe fn test_mm_cvt_roundss_si32() {
60054 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60055 let r = _mm_cvt_roundss_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60056 let e: i32 = -1;
60057 assert_eq!(r, e);
60058 }
60059
60060 #[simd_test(enable = "avx512f")]
60061 unsafe fn test_mm_cvt_roundss_i32() {
60062 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60063 let r = _mm_cvt_roundss_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60064 let e: i32 = -1;
60065 assert_eq!(r, e);
60066 }
60067
60068 #[simd_test(enable = "avx512f")]
60069 unsafe fn test_mm_cvt_roundss_u32() {
60070 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60071 let r = _mm_cvt_roundss_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60072 let e: u32 = u32::MAX;
60073 assert_eq!(r, e);
60074 }
60075
60076 #[simd_test(enable = "avx512f")]
60077 unsafe fn test_mm_cvtss_i32() {
60078 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60079 let r = _mm_cvtss_i32(a);
60080 let e: i32 = -2;
60081 assert_eq!(r, e);
60082 }
60083
60084 #[simd_test(enable = "avx512f")]
60085 unsafe fn test_mm_cvtss_u32() {
60086 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60087 let r = _mm_cvtss_u32(a);
60088 let e: u32 = u32::MAX;
60089 assert_eq!(r, e);
60090 }
60091
60092 #[simd_test(enable = "avx512f")]
60093 unsafe fn test_mm_cvt_roundsd_si32() {
60094 let a = _mm_set_pd(1., -1.5);
60095 let r = _mm_cvt_roundsd_si32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60096 let e: i32 = -1;
60097 assert_eq!(r, e);
60098 }
60099
60100 #[simd_test(enable = "avx512f")]
60101 unsafe fn test_mm_cvt_roundsd_i32() {
60102 let a = _mm_set_pd(1., -1.5);
60103 let r = _mm_cvt_roundsd_i32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60104 let e: i32 = -1;
60105 assert_eq!(r, e);
60106 }
60107
60108 #[simd_test(enable = "avx512f")]
60109 unsafe fn test_mm_cvt_roundsd_u32() {
60110 let a = _mm_set_pd(1., -1.5);
60111 let r = _mm_cvt_roundsd_u32::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
60112 let e: u32 = u32::MAX;
60113 assert_eq!(r, e);
60114 }
60115
60116 #[simd_test(enable = "avx512f")]
60117 unsafe fn test_mm_cvtsd_i32() {
60118 let a = _mm_set_pd(1., -1.5);
60119 let r = _mm_cvtsd_i32(a);
60120 let e: i32 = -2;
60121 assert_eq!(r, e);
60122 }
60123
60124 #[simd_test(enable = "avx512f")]
60125 unsafe fn test_mm_cvtsd_u32() {
60126 let a = _mm_set_pd(1., -1.5);
60127 let r = _mm_cvtsd_u32(a);
60128 let e: u32 = u32::MAX;
60129 assert_eq!(r, e);
60130 }
60131
60132 #[simd_test(enable = "avx512f")]
60133 unsafe fn test_mm_cvt_roundi32_ss() {
60134 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60135 let b: i32 = 9;
60136 let r = _mm_cvt_roundi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60137 let e = _mm_set_ps(0., -0.5, 1., 9.);
60138 assert_eq_m128(r, e);
60139 }
60140
60141 #[simd_test(enable = "avx512f")]
60142 unsafe fn test_mm_cvt_roundsi32_ss() {
60143 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60144 let b: i32 = 9;
60145 let r = _mm_cvt_roundsi32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60146 let e = _mm_set_ps(0., -0.5, 1., 9.);
60147 assert_eq_m128(r, e);
60148 }
60149
60150 #[simd_test(enable = "avx512f")]
60151 unsafe fn test_mm_cvt_roundu32_ss() {
60152 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60153 let b: u32 = 9;
60154 let r = _mm_cvt_roundu32_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
60155 let e = _mm_set_ps(0., -0.5, 1., 9.);
60156 assert_eq_m128(r, e);
60157 }
60158
60159 #[simd_test(enable = "avx512f")]
60160 unsafe fn test_mm_cvti32_ss() {
60161 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60162 let b: i32 = 9;
60163 let r = _mm_cvti32_ss(a, b);
60164 let e = _mm_set_ps(0., -0.5, 1., 9.);
60165 assert_eq_m128(r, e);
60166 }
60167
60168 #[simd_test(enable = "avx512f")]
60169 unsafe fn test_mm_cvti32_sd() {
60170 let a = _mm_set_pd(1., -1.5);
60171 let b: i32 = 9;
60172 let r = _mm_cvti32_sd(a, b);
60173 let e = _mm_set_pd(1., 9.);
60174 assert_eq_m128d(r, e);
60175 }
60176
60177 #[simd_test(enable = "avx512f")]
60178 unsafe fn test_mm_cvtt_roundss_si32() {
60179 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60180 let r = _mm_cvtt_roundss_si32::<_MM_FROUND_NO_EXC>(a);
60181 let e: i32 = -1;
60182 assert_eq!(r, e);
60183 }
60184
60185 #[simd_test(enable = "avx512f")]
60186 unsafe fn test_mm_cvtt_roundss_i32() {
60187 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60188 let r = _mm_cvtt_roundss_i32::<_MM_FROUND_NO_EXC>(a);
60189 let e: i32 = -1;
60190 assert_eq!(r, e);
60191 }
60192
60193 #[simd_test(enable = "avx512f")]
60194 unsafe fn test_mm_cvtt_roundss_u32() {
60195 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60196 let r = _mm_cvtt_roundss_u32::<_MM_FROUND_NO_EXC>(a);
60197 let e: u32 = u32::MAX;
60198 assert_eq!(r, e);
60199 }
60200
60201 #[simd_test(enable = "avx512f")]
60202 unsafe fn test_mm_cvttss_i32() {
60203 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60204 let r = _mm_cvttss_i32(a);
60205 let e: i32 = -1;
60206 assert_eq!(r, e);
60207 }
60208
60209 #[simd_test(enable = "avx512f")]
60210 unsafe fn test_mm_cvttss_u32() {
60211 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60212 let r = _mm_cvttss_u32(a);
60213 let e: u32 = u32::MAX;
60214 assert_eq!(r, e);
60215 }
60216
60217 #[simd_test(enable = "avx512f")]
60218 unsafe fn test_mm_cvtt_roundsd_si32() {
60219 let a = _mm_set_pd(1., -1.5);
60220 let r = _mm_cvtt_roundsd_si32::<_MM_FROUND_NO_EXC>(a);
60221 let e: i32 = -1;
60222 assert_eq!(r, e);
60223 }
60224
60225 #[simd_test(enable = "avx512f")]
60226 unsafe fn test_mm_cvtt_roundsd_i32() {
60227 let a = _mm_set_pd(1., -1.5);
60228 let r = _mm_cvtt_roundsd_i32::<_MM_FROUND_NO_EXC>(a);
60229 let e: i32 = -1;
60230 assert_eq!(r, e);
60231 }
60232
60233 #[simd_test(enable = "avx512f")]
60234 unsafe fn test_mm_cvtt_roundsd_u32() {
60235 let a = _mm_set_pd(1., -1.5);
60236 let r = _mm_cvtt_roundsd_u32::<_MM_FROUND_NO_EXC>(a);
60237 let e: u32 = u32::MAX;
60238 assert_eq!(r, e);
60239 }
60240
60241 #[simd_test(enable = "avx512f")]
60242 unsafe fn test_mm_cvttsd_i32() {
60243 let a = _mm_set_pd(1., -1.5);
60244 let r = _mm_cvttsd_i32(a);
60245 let e: i32 = -1;
60246 assert_eq!(r, e);
60247 }
60248
60249 #[simd_test(enable = "avx512f")]
60250 unsafe fn test_mm_cvttsd_u32() {
60251 let a = _mm_set_pd(1., -1.5);
60252 let r = _mm_cvttsd_u32(a);
60253 let e: u32 = u32::MAX;
60254 assert_eq!(r, e);
60255 }
60256
60257 #[simd_test(enable = "avx512f")]
60258 unsafe fn test_mm_cvtu32_ss() {
60259 let a = _mm_set_ps(0., -0.5, 1., -1.5);
60260 let b: u32 = 9;
60261 let r = _mm_cvtu32_ss(a, b);
60262 let e = _mm_set_ps(0., -0.5, 1., 9.);
60263 assert_eq_m128(r, e);
60264 }
60265
60266 #[simd_test(enable = "avx512f")]
60267 unsafe fn test_mm_cvtu32_sd() {
60268 let a = _mm_set_pd(1., -1.5);
60269 let b: u32 = 9;
60270 let r = _mm_cvtu32_sd(a, b);
60271 let e = _mm_set_pd(1., 9.);
60272 assert_eq_m128d(r, e);
60273 }
60274
60275 #[simd_test(enable = "avx512f")]
60276 unsafe fn test_mm_comi_round_ss() {
60277 let a = _mm_set1_ps(2.2);
60278 let b = _mm_set1_ps(1.1);
60279 let r = _mm_comi_round_ss::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
60280 let e: i32 = 0;
60281 assert_eq!(r, e);
60282 }
60283
60284 #[simd_test(enable = "avx512f")]
60285 unsafe fn test_mm_comi_round_sd() {
60286 let a = _mm_set1_pd(2.2);
60287 let b = _mm_set1_pd(1.1);
60288 let r = _mm_comi_round_sd::<0, _MM_FROUND_CUR_DIRECTION>(a, b);
60289 let e: i32 = 0;
60290 assert_eq!(r, e);
60291 }
60292
60293 #[simd_test(enable = "avx512f")]
60294 unsafe fn test_mm512_cvtsi512_si32() {
60295 let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
60296 let r = _mm512_cvtsi512_si32(a);
60297 let e: i32 = 1;
60298 assert_eq!(r, e);
60299 }
60300
60301 #[simd_test(enable = "avx512f")]
60302 unsafe fn test_mm512_cvtss_f32() {
60303 let a = _mm512_setr_ps(
60304 312.0134, 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
60305 );
60306 assert_eq!(_mm512_cvtss_f32(a), 312.0134);
60307 }
60308
60309 #[simd_test(enable = "avx512f")]
60310 unsafe fn test_mm512_cvtsd_f64() {
60311 let r = _mm512_cvtsd_f64(_mm512_setr_pd(-1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8));
60312 assert_eq!(r, -1.1);
60313 }
60314
60315 #[simd_test(enable = "avx512f")]
60316 unsafe fn test_mm512_shuffle_pd() {
60317 let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60318 let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60319 let r = _mm512_shuffle_pd::<0b11_11_11_11>(a, b);
60320 let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
60321 assert_eq_m512d(r, e);
60322 }
60323
60324 #[simd_test(enable = "avx512f")]
60325 unsafe fn test_mm512_mask_shuffle_pd() {
60326 let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60327 let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60328 let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
60329 assert_eq_m512d(r, a);
60330 let r = _mm512_mask_shuffle_pd::<0b11_11_11_11>(a, 0b11111111, a, b);
60331 let e = _mm512_setr_pd(4., 3., 8., 7., 4., 3., 8., 7.);
60332 assert_eq_m512d(r, e);
60333 }
60334
60335 #[simd_test(enable = "avx512f")]
60336 unsafe fn test_mm512_maskz_shuffle_pd() {
60337 let a = _mm512_setr_pd(1., 4., 5., 8., 1., 4., 5., 8.);
60338 let b = _mm512_setr_pd(2., 3., 6., 7., 2., 3., 6., 7.);
60339 let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
60340 assert_eq_m512d(r, _mm512_setzero_pd());
60341 let r = _mm512_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b);
60342 let e = _mm512_setr_pd(4., 3., 8., 7., 0., 0., 0., 0.);
60343 assert_eq_m512d(r, e);
60344 }
60345
60346 #[simd_test(enable = "avx512f")]
60347 unsafe fn test_mm512_mask_expandloadu_epi32() {
60348 let src = _mm512_set1_epi32(42);
60349 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
60350 let p = a.as_ptr();
60351 let m = 0b11101000_11001010;
60352 let r = _mm512_mask_expandloadu_epi32(src, m, black_box(p));
60353 let e = _mm512_set_epi32(8, 7, 6, 42, 5, 42, 42, 42, 4, 3, 42, 42, 2, 42, 1, 42);
60354 assert_eq_m512i(r, e);
60355 }
60356
60357 #[simd_test(enable = "avx512f")]
60358 unsafe fn test_mm512_maskz_expandloadu_epi32() {
60359 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
60360 let p = a.as_ptr();
60361 let m = 0b11101000_11001010;
60362 let r = _mm512_maskz_expandloadu_epi32(m, black_box(p));
60363 let e = _mm512_set_epi32(8, 7, 6, 0, 5, 0, 0, 0, 4, 3, 0, 0, 2, 0, 1, 0);
60364 assert_eq_m512i(r, e);
60365 }
60366
60367 #[simd_test(enable = "avx512f,avx512vl")]
60368 unsafe fn test_mm256_mask_expandloadu_epi32() {
60369 let src = _mm256_set1_epi32(42);
60370 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
60371 let p = a.as_ptr();
60372 let m = 0b11101000;
60373 let r = _mm256_mask_expandloadu_epi32(src, m, black_box(p));
60374 let e = _mm256_set_epi32(4, 3, 2, 42, 1, 42, 42, 42);
60375 assert_eq_m256i(r, e);
60376 }
60377
60378 #[simd_test(enable = "avx512f,avx512vl")]
60379 unsafe fn test_mm256_maskz_expandloadu_epi32() {
60380 let a = &[1_i32, 2, 3, 4, 5, 6, 7, 8];
60381 let p = a.as_ptr();
60382 let m = 0b11101000;
60383 let r = _mm256_maskz_expandloadu_epi32(m, black_box(p));
60384 let e = _mm256_set_epi32(4, 3, 2, 0, 1, 0, 0, 0);
60385 assert_eq_m256i(r, e);
60386 }
60387
60388 #[simd_test(enable = "avx512f,avx512vl")]
60389 unsafe fn test_mm_mask_expandloadu_epi32() {
60390 let src = _mm_set1_epi32(42);
60391 let a = &[1_i32, 2, 3, 4];
60392 let p = a.as_ptr();
60393 let m = 0b11111000;
60394 let r = _mm_mask_expandloadu_epi32(src, m, black_box(p));
60395 let e = _mm_set_epi32(1, 42, 42, 42);
60396 assert_eq_m128i(r, e);
60397 }
60398
60399 #[simd_test(enable = "avx512f,avx512vl")]
60400 unsafe fn test_mm_maskz_expandloadu_epi32() {
60401 let a = &[1_i32, 2, 3, 4];
60402 let p = a.as_ptr();
60403 let m = 0b11111000;
60404 let r = _mm_maskz_expandloadu_epi32(m, black_box(p));
60405 let e = _mm_set_epi32(1, 0, 0, 0);
60406 assert_eq_m128i(r, e);
60407 }
60408
60409 #[simd_test(enable = "avx512f")]
60410 unsafe fn test_mm512_mask_expandloadu_epi64() {
60411 let src = _mm512_set1_epi64(42);
60412 let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
60413 let p = a.as_ptr();
60414 let m = 0b11101000;
60415 let r = _mm512_mask_expandloadu_epi64(src, m, black_box(p));
60416 let e = _mm512_set_epi64(4, 3, 2, 42, 1, 42, 42, 42);
60417 assert_eq_m512i(r, e);
60418 }
60419
60420 #[simd_test(enable = "avx512f")]
60421 unsafe fn test_mm512_maskz_expandloadu_epi64() {
60422 let a = &[1_i64, 2, 3, 4, 5, 6, 7, 8];
60423 let p = a.as_ptr();
60424 let m = 0b11101000;
60425 let r = _mm512_maskz_expandloadu_epi64(m, black_box(p));
60426 let e = _mm512_set_epi64(4, 3, 2, 0, 1, 0, 0, 0);
60427 assert_eq_m512i(r, e);
60428 }
60429
60430 #[simd_test(enable = "avx512f,avx512vl")]
60431 unsafe fn test_mm256_mask_expandloadu_epi64() {
60432 let src = _mm256_set1_epi64x(42);
60433 let a = &[1_i64, 2, 3, 4];
60434 let p = a.as_ptr();
60435 let m = 0b11101000;
60436 let r = _mm256_mask_expandloadu_epi64(src, m, black_box(p));
60437 let e = _mm256_set_epi64x(1, 42, 42, 42);
60438 assert_eq_m256i(r, e);
60439 }
60440
60441 #[simd_test(enable = "avx512f,avx512vl")]
60442 unsafe fn test_mm256_maskz_expandloadu_epi64() {
60443 let a = &[1_i64, 2, 3, 4];
60444 let p = a.as_ptr();
60445 let m = 0b11101000;
60446 let r = _mm256_maskz_expandloadu_epi64(m, black_box(p));
60447 let e = _mm256_set_epi64x(1, 0, 0, 0);
60448 assert_eq_m256i(r, e);
60449 }
60450
60451 #[simd_test(enable = "avx512f,avx512vl")]
60452 unsafe fn test_mm_mask_expandloadu_epi64() {
60453 let src = _mm_set1_epi64x(42);
60454 let a = &[1_i64, 2];
60455 let p = a.as_ptr();
60456 let m = 0b11101000;
60457 let r = _mm_mask_expandloadu_epi64(src, m, black_box(p));
60458 let e = _mm_set_epi64x(42, 42);
60459 assert_eq_m128i(r, e);
60460 }
60461
60462 #[simd_test(enable = "avx512f,avx512vl")]
60463 unsafe fn test_mm_maskz_expandloadu_epi64() {
60464 let a = &[1_i64, 2];
60465 let p = a.as_ptr();
60466 let m = 0b11101000;
60467 let r = _mm_maskz_expandloadu_epi64(m, black_box(p));
60468 let e = _mm_set_epi64x(0, 0);
60469 assert_eq_m128i(r, e);
60470 }
60471
60472 #[simd_test(enable = "avx512f")]
60473 unsafe fn test_mm512_mask_expandloadu_ps() {
60474 let src = _mm512_set1_ps(42.);
60475 let a = &[
60476 1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
60477 ];
60478 let p = a.as_ptr();
60479 let m = 0b11101000_11001010;
60480 let r = _mm512_mask_expandloadu_ps(src, m, black_box(p));
60481 let e = _mm512_set_ps(
60482 8., 7., 6., 42., 5., 42., 42., 42., 4., 3., 42., 42., 2., 42., 1., 42.,
60483 );
60484 assert_eq_m512(r, e);
60485 }
60486
60487 #[simd_test(enable = "avx512f")]
60488 unsafe fn test_mm512_maskz_expandloadu_ps() {
60489 let a = &[
60490 1.0f32, 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
60491 ];
60492 let p = a.as_ptr();
60493 let m = 0b11101000_11001010;
60494 let r = _mm512_maskz_expandloadu_ps(m, black_box(p));
60495 let e = _mm512_set_ps(
60496 8., 7., 6., 0., 5., 0., 0., 0., 4., 3., 0., 0., 2., 0., 1., 0.,
60497 );
60498 assert_eq_m512(r, e);
60499 }
60500
60501 #[simd_test(enable = "avx512f,avx512vl")]
60502 unsafe fn test_mm256_mask_expandloadu_ps() {
60503 let src = _mm256_set1_ps(42.);
60504 let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
60505 let p = a.as_ptr();
60506 let m = 0b11101000;
60507 let r = _mm256_mask_expandloadu_ps(src, m, black_box(p));
60508 let e = _mm256_set_ps(4., 3., 2., 42., 1., 42., 42., 42.);
60509 assert_eq_m256(r, e);
60510 }
60511
60512 #[simd_test(enable = "avx512f,avx512vl")]
60513 unsafe fn test_mm256_maskz_expandloadu_ps() {
60514 let a = &[1.0f32, 2., 3., 4., 5., 6., 7., 8.];
60515 let p = a.as_ptr();
60516 let m = 0b11101000;
60517 let r = _mm256_maskz_expandloadu_ps(m, black_box(p));
60518 let e = _mm256_set_ps(4., 3., 2., 0., 1., 0., 0., 0.);
60519 assert_eq_m256(r, e);
60520 }
60521
60522 #[simd_test(enable = "avx512f,avx512vl")]
60523 unsafe fn test_mm_mask_expandloadu_ps() {
60524 let src = _mm_set1_ps(42.);
60525 let a = &[1.0f32, 2., 3., 4.];
60526 let p = a.as_ptr();
60527 let m = 0b11101000;
60528 let r = _mm_mask_expandloadu_ps(src, m, black_box(p));
60529 let e = _mm_set_ps(1., 42., 42., 42.);
60530 assert_eq_m128(r, e);
60531 }
60532
60533 #[simd_test(enable = "avx512f,avx512vl")]
60534 unsafe fn test_mm_maskz_expandloadu_ps() {
60535 let a = &[1.0f32, 2., 3., 4.];
60536 let p = a.as_ptr();
60537 let m = 0b11101000;
60538 let r = _mm_maskz_expandloadu_ps(m, black_box(p));
60539 let e = _mm_set_ps(1., 0., 0., 0.);
60540 assert_eq_m128(r, e);
60541 }
60542
60543 #[simd_test(enable = "avx512f")]
60544 unsafe fn test_mm512_mask_expandloadu_pd() {
60545 let src = _mm512_set1_pd(42.);
60546 let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
60547 let p = a.as_ptr();
60548 let m = 0b11101000;
60549 let r = _mm512_mask_expandloadu_pd(src, m, black_box(p));
60550 let e = _mm512_set_pd(4., 3., 2., 42., 1., 42., 42., 42.);
60551 assert_eq_m512d(r, e);
60552 }
60553
60554 #[simd_test(enable = "avx512f")]
60555 unsafe fn test_mm512_maskz_expandloadu_pd() {
60556 let a = &[1.0f64, 2., 3., 4., 5., 6., 7., 8.];
60557 let p = a.as_ptr();
60558 let m = 0b11101000;
60559 let r = _mm512_maskz_expandloadu_pd(m, black_box(p));
60560 let e = _mm512_set_pd(4., 3., 2., 0., 1., 0., 0., 0.);
60561 assert_eq_m512d(r, e);
60562 }
60563
60564 #[simd_test(enable = "avx512f,avx512vl")]
60565 unsafe fn test_mm256_mask_expandloadu_pd() {
60566 let src = _mm256_set1_pd(42.);
60567 let a = &[1.0f64, 2., 3., 4.];
60568 let p = a.as_ptr();
60569 let m = 0b11101000;
60570 let r = _mm256_mask_expandloadu_pd(src, m, black_box(p));
60571 let e = _mm256_set_pd(1., 42., 42., 42.);
60572 assert_eq_m256d(r, e);
60573 }
60574
60575 #[simd_test(enable = "avx512f,avx512vl")]
60576 unsafe fn test_mm256_maskz_expandloadu_pd() {
60577 let a = &[1.0f64, 2., 3., 4.];
60578 let p = a.as_ptr();
60579 let m = 0b11101000;
60580 let r = _mm256_maskz_expandloadu_pd(m, black_box(p));
60581 let e = _mm256_set_pd(1., 0., 0., 0.);
60582 assert_eq_m256d(r, e);
60583 }
60584
60585 #[simd_test(enable = "avx512f,avx512vl")]
60586 unsafe fn test_mm_mask_expandloadu_pd() {
60587 let src = _mm_set1_pd(42.);
60588 let a = &[1.0f64, 2.];
60589 let p = a.as_ptr();
60590 let m = 0b11101000;
60591 let r = _mm_mask_expandloadu_pd(src, m, black_box(p));
60592 let e = _mm_set_pd(42., 42.);
60593 assert_eq_m128d(r, e);
60594 }
60595
60596 #[simd_test(enable = "avx512f,avx512vl")]
60597 unsafe fn test_mm_maskz_expandloadu_pd() {
60598 let a = &[1.0f64, 2.];
60599 let p = a.as_ptr();
60600 let m = 0b11101000;
60601 let r = _mm_maskz_expandloadu_pd(m, black_box(p));
60602 let e = _mm_set_pd(0., 0.);
60603 assert_eq_m128d(r, e);
60604 }
60605}
60606